redcodegen 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of redcodegen might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: redcodegen
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Add your description here
5
5
  Requires-Dist: click>=8.0.0
6
6
  Requires-Dist: cwe2>=3.0.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "redcodegen"
3
- version = "0.0.3"
3
+ version = "0.0.4"
4
4
  description = "Add your description here"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,263 @@
1
+ """
2
+ main.py
3
+ Main script for generating and evaluating vulnerable code samples
4
+ """
5
+
6
+ import rich_click as click
7
+ import jsonlines
8
+ import logging
9
+ import dspy
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import List, Set, Dict, Any
13
+ from cwe2.database import Database
14
+
15
+ from redcodegen.constants import CWE_TOP_25, create_lm
16
+
17
+ from rich.logging import RichHandler
18
+
19
+ # Setup logging
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format="%(message)s",
23
+ handlers=[RichHandler(rich_tracebacks=True)]
24
+ )
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def load_completed_cwes(output_path: Path) -> Set[int]:
29
+ """Load CWE IDs that have already been processed.
30
+
31
+ Args:
32
+ output_path: Path to the output JSONL file
33
+
34
+ Returns:
35
+ Set of CWE IDs that are already in the output file
36
+ """
37
+ completed = set()
38
+
39
+ if not output_path.exists():
40
+ return completed
41
+
42
+ try:
43
+ with jsonlines.open(output_path) as reader:
44
+ for record in reader:
45
+ if 'cwe_id' in record:
46
+ completed.add(record['cwe_id'])
47
+ logger.info(f"Found {len(completed)} already-completed CWEs in {output_path}")
48
+ except Exception as e:
49
+ logger.warning(f"Could not read existing output file: {e}")
50
+
51
+ return completed
52
+
53
+
54
+ def get_model_config() -> Dict[str, Any]:
55
+ """Extract model configuration from current DSPy settings.
56
+
57
+ Returns:
58
+ Dict with model configuration info
59
+ """
60
+ lm = dspy.settings.lm
61
+ config = {
62
+ "model": getattr(lm, 'model', 'unknown'),
63
+ }
64
+
65
+ return config
66
+
67
+
68
+ def build_record(
69
+ cwe_id: int,
70
+ cwe_name: str,
71
+ cwe_description: str,
72
+ scenarios: List[str],
73
+ codes: List[str],
74
+ evaluations: List[Any],
75
+ errors: List[str],
76
+ min_scenarios: int
77
+ ) -> Dict[str, Any]:
78
+ """Build a record for JSONL output.
79
+
80
+ Args:
81
+ cwe_id: CWE identifier
82
+ cwe_name: CWE name
83
+ cwe_description: CWE description
84
+ scenarios: List of scenario descriptions
85
+ codes: List of generated code samples
86
+ evaluations: List of evaluation results (can contain None for failures)
87
+ errors: List of error messages (None for successful evaluations)
88
+ min_scenarios: Minimum scenarios parameter used
89
+
90
+ Returns:
91
+ Dict representing the complete record for this CWE
92
+ """
93
+ samples = []
94
+ for scenario, code, evaluation, error in zip(scenarios, codes, evaluations, errors):
95
+ samples.append({
96
+ "scenario": scenario,
97
+ "code": code,
98
+ "evaluation": evaluation
99
+ })
100
+
101
+ return {
102
+ "cwe_id": cwe_id,
103
+ "cwe_name": cwe_name,
104
+ "cwe_description": cwe_description,
105
+ "timestamp": datetime.utcnow().isoformat() + 'Z',
106
+ "model_config": get_model_config(),
107
+ "min_scenarios": min_scenarios,
108
+ "samples": samples
109
+ }
110
+
111
+
112
+ def append_to_jsonl(record: Dict[str, Any], output_path: Path):
113
+ """Append a record to the JSONL file.
114
+
115
+ Args:
116
+ record: Record to append
117
+ output_path: Path to output file
118
+ """
119
+ with jsonlines.open(output_path, mode='a') as writer:
120
+ writer.write(record)
121
+ logger.info(f"Saved CWE-{record['cwe_id']} to {output_path}")
122
+
123
+
124
+ @click.command()
125
+ @click.option(
126
+ '--cwes', '-c',
127
+ multiple=True,
128
+ type=int,
129
+ help='CWE IDs to process (can specify multiple times, e.g., -c 89 -c 79)'
130
+ )
131
+ @click.option(
132
+ '--use-top-25',
133
+ is_flag=True,
134
+ help='Process all CWE Top 25'
135
+ )
136
+ @click.option(
137
+ '--min-samples', '-n',
138
+ default=3,
139
+ type=int,
140
+ help='Minimum samples per CWE (default: 3)'
141
+ )
142
+ @click.option(
143
+ '--output', '-o',
144
+ default='results.jsonl',
145
+ type=click.Path(),
146
+ help='Output JSONL file (default: results.jsonl)'
147
+ )
148
+ @click.option(
149
+ '--model', '-m',
150
+ default='openai/gpt-4o-mini',
151
+ help='Model identifier (default: openai/gpt-4o-mini)'
152
+ )
153
+ @click.option(
154
+ '--api-key',
155
+ default=None,
156
+ help='API key (defaults to OPENAI_API_KEY env var)'
157
+ )
158
+ @
159
+ def main(cwes, use_top_25, min_samples, output, model, api_key):
160
+ """Generate and evaluate vulnerable code samples for specified CWEs.
161
+
162
+ Examples:
163
+ python -m redcodegen -c 89 -c 79 # manually specify cwe
164
+ python -m redcodegen -n 5 # specify number of rollouts
165
+ python -m redcodegen --use-top-25 # run CWE top 25
166
+ python -m redcodegen --use-top-25 -o results.jsonl # resume existing run
167
+ python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
168
+ """
169
+ # Configure DSPy with specified model
170
+ lm = create_lm(model_name=model, api_key=api_key)
171
+ dspy.configure(lm=lm)
172
+ logger.info(f"Configured model: {model}")
173
+
174
+ # Import generator and validator after configuring dspy
175
+ from redcodegen.generator import run_cwe
176
+ from redcodegen.validator import evaluate
177
+
178
+ output_path = Path(output)
179
+
180
+ # Determine which CWEs to process
181
+ if use_top_25:
182
+ cwes_to_process = CWE_TOP_25
183
+ logger.info(f"Processing CWE Top 25 ({len(cwes_to_process)} CWEs)")
184
+ elif cwes:
185
+ cwes_to_process = list(cwes)
186
+ logger.info(f"Processing {len(cwes_to_process)} specified CWEs")
187
+ else:
188
+ logger.error("Must specify either --cwes or --use-top-25")
189
+ raise click.UsageError("Must specify either --cwes or --use-top-25")
190
+
191
+ # Load already-completed CWEs for idempotency
192
+ completed_cwes = load_completed_cwes(output_path)
193
+ cwes_to_process = [cwe for cwe in cwes_to_process if cwe not in completed_cwes]
194
+
195
+ if not cwes_to_process:
196
+ logger.info("All CWEs already completed!")
197
+ return
198
+
199
+ logger.info(f"Processing {len(cwes_to_process)} CWEs (skipped {len(completed_cwes)} already completed)")
200
+
201
+ # Initialize CWE database
202
+ db = Database()
203
+
204
+ # Process each CWE
205
+ for idx, cwe_id in enumerate(cwes_to_process, 1):
206
+ logger.info(f"[{idx}/{len(cwes_to_process)}] Processing CWE-{cwe_id}...")
207
+
208
+ try:
209
+ # Get CWE metadata
210
+ entry = db.get(cwe_id)
211
+ cwe_name = entry.name
212
+ cwe_description = entry.extended_description or entry.description
213
+
214
+ # Generate code samples
215
+ logger.info(f" Generating {min_samples} code samples...")
216
+ codes = run_cwe(cwe_id, min_scenarios=min_samples)
217
+ logger.info(f" Generated {len(codes)} code samples")
218
+
219
+ # Get scenarios (need to call generate again to get scenarios)
220
+ from redcodegen.scenarios import generate
221
+ scenario_data = generate(cwe_id, min_scenarios=min_samples)
222
+ scenarios = scenario_data["scenarios"][:len(codes)] # Match code count
223
+
224
+ # Evaluate each code sample
225
+ evaluations = []
226
+ errors = []
227
+
228
+ for i, code in enumerate(codes, 1):
229
+ logger.info(f" Evaluating sample {i}/{len(codes)}...")
230
+ try:
231
+ evaluation = evaluate(code)
232
+ evaluations.append(evaluation)
233
+ errors.append(None)
234
+ logger.info(f" Found {len(evaluation)} vulnerabilities")
235
+ except Exception as e:
236
+ logger.warning(f" Evaluation failed: {e}")
237
+ evaluations.append(None)
238
+ errors.append(str(e))
239
+
240
+ # Build and save record
241
+ record = build_record(
242
+ cwe_id=cwe_id,
243
+ cwe_name=cwe_name,
244
+ cwe_description=cwe_description,
245
+ scenarios=scenarios,
246
+ codes=codes,
247
+ evaluations=evaluations,
248
+ errors=errors,
249
+ min_scenarios=min_samples
250
+ )
251
+
252
+ append_to_jsonl(record, output_path)
253
+ logger.info(f"✓ Completed CWE-{cwe_id}")
254
+
255
+ except Exception as e:
256
+ logger.error(f"✗ Failed to process CWE-{cwe_id}: {e}")
257
+ continue
258
+
259
+ logger.info(f"Completed! Results saved to {output_path}")
260
+
261
+
262
+ if __name__ == '__main__':
263
+ main()
@@ -0,0 +1,17 @@
1
+ import dspy
2
+ import jsonlines
3
+ from cwe2.database import Database
4
+
5
+ from redcodegen.constants import LM
6
+
7
+ dspy.configure(lm=LM)
8
+
9
+
10
+ db = Database()
11
+ entry = db.get(502)
12
+
13
+ print(entry.name)
14
+ print(entry.extended_description)
15
+
16
+
17
+
@@ -48,7 +48,7 @@ def generate(cwe_id, min_scenarios=3):
48
48
  output_scenarios = []
49
49
  while len(output_scenarios) < min_scenarios:
50
50
  scenarios = extract_scenarios(name=entry.name, description=entry.extended_description,
51
- config={"temperature": 0.8, "rollout_id": len(output_scenarios)}).scenarios
51
+ config={"rollout_id": len(output_scenarios)}).scenarios
52
52
  output_scenarios.extend(scenarios)
53
53
  scenarios = [strip_vulnerability(scenario=i).coding_task for i in output_scenarios]
54
54
  suggestions = [suggest_libraries(task=i, suggested_libraries=CODEQL_LIBRARIES) for i in scenarios]
File without changes