PyPI - redcodegen - Versions diffs - 0.0.4__tar.gz → 0.0.5__tar.gz - Mend

redcodegen 0.0.4tar.gz → 0.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of redcodegen might be problematic. Click here for more details.

Files changed (14) hide show

{redcodegen-0.0.4 → redcodegen-0.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: redcodegen
-Version: 0.0.4
+Version: 0.0.5
 Summary: Add your description here
 Requires-Dist: click>=8.0.0
 Requires-Dist: cwe2>=3.0.0

{redcodegen-0.0.4 → redcodegen-0.0.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "redcodegen"
-version = "0.0.4"
+version = "0.0.5"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/constants.py RENAMED Viewed

@@ -11,7 +11,7 @@ CWE_TOP_25 = [
     798, 190, 400, 306
 ]
-def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None):
+def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None, api_base=None):
     """Create a DSPy language model instance.
     Args:
@@ -23,12 +23,24 @@ def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None):
     """
     if api_key is None:
         api_key = os.environ.get("OPENAI_API_KEY")
-    return dspy.LM(
-        model_name,
-        api_key=api_key,
-        temperature=temperature,
-        max_tokens=16000
-    )
+    if api_base is None:
+        api_base = os.environ.get("OPENAI_API_BASE")
+    if api_base is None:
+        return dspy.LM(
+            model_name,
+            api_key=api_key,
+            temperature=temperature,
+            max_tokens=16000
+        )
+    else:
+        return dspy.LM(
+            model_name,
+            api_key=api_key,
+            api_base=api_base,
+            temperature=temperature,
+            max_tokens=16000
+        )
 SCENARIO_EXAMPLES = [
     dspy.Example(

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/main.py RENAMED Viewed

@@ -155,13 +155,18 @@ def append_to_jsonl(record: Dict[str, Any], output_path: Path):
     default=None,
     help='API key (defaults to OPENAI_API_KEY env var)'
 )
+@click.option(
+    '--api-base',
+    default=None,
+    help='API base URL (defaults to OPENAI_API_BASE env var)'
+)
 @click.option(
     '--temperature',
     default=0.8,
     type=float,
     help='Temperature for code generation (default: 0.8)'
 )
-def main(cwes, use_top_25, min_samples, output, model, api_key, temperature):
+def main(cwes, use_top_25, min_samples, output, model, api_key, api_base, temperature):
     """Generate and evaluate vulnerable code samples for specified CWEs.
     Examples:
@@ -172,7 +177,7 @@ def main(cwes, use_top_25, min_samples, output, model, api_key, temperature):
         python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
     """
     # Configure DSPy with specified model
-    lm = create_lm(model_name=model, temperature=temperature, api_key=api_key)
+    lm = create_lm(model_name=model, temperature=temperature, api_key=api_key, api_base=api_base)
     dspy.configure(lm=lm)
     logger.info(f"Configured model: {model}")

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/scenarios.py RENAMED Viewed

@@ -29,8 +29,8 @@ class SuggestLibraries(dspy.Signature):
     task: str = dspy.InputField()
     suggested_libraries: List[str] = dspy.InputField()
-    chosen_library: Optional[str] = dspy.OutputField(desc="choose a library that would best help solve the task, or None")
-    rephrased_task: Optional[str] = dspy.OutputField(desc="rephrase the task in terms of the chosen library, or None")
+    chosen_library: str = dspy.OutputField(desc="choose a library that would best help solve the task, or say None")
+    rephrased_task: str = dspy.OutputField(desc="rephrase the task in terms of the chosen library, or say None")
 suggest_libraries = dspy.Predict(SuggestLibraries)
 def generate(cwe_id, min_scenarios=3):
@@ -53,7 +53,7 @@ def generate(cwe_id, min_scenarios=3):
     scenarios = [strip_vulnerability(scenario=i).coding_task for i in output_scenarios]
     suggestions = [suggest_libraries(task=i, suggested_libraries=CODEQL_LIBRARIES) for i in scenarios]
     results = [
-        i.rephrased_task if i.rephrased_task is not None else j
+        i.rephrased_task if ((i.rephrased_task is not None) and (i.rephrased_task.lower().strip() != "none")) else j
         for i,j in zip(suggestions, scenarios)
     ]

redcodegen-0.0.4/redcodegen/#main.py# DELETED Viewed

@@ -1,263 +0,0 @@
-"""
-main.py
-Main script for generating and evaluating vulnerable code samples
-"""
-import rich_click as click
-import jsonlines
-import logging
-import dspy
-from datetime import datetime
-from pathlib import Path
-from typing import List, Set, Dict, Any
-from cwe2.database import Database
-from redcodegen.constants import CWE_TOP_25, create_lm
-from rich.logging import RichHandler
-# Setup logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(message)s",
-    handlers=[RichHandler(rich_tracebacks=True)]
-)
-logger = logging.getLogger(__name__)
-def load_completed_cwes(output_path: Path) -> Set[int]:
-    """Load CWE IDs that have already been processed.
-    Args:
-        output_path: Path to the output JSONL file
-    Returns:
-        Set of CWE IDs that are already in the output file
-    """
-    completed = set()
-    if not output_path.exists():
-        return completed
-    try:
-        with jsonlines.open(output_path) as reader:
-            for record in reader:
-                if 'cwe_id' in record:
-                    completed.add(record['cwe_id'])
-        logger.info(f"Found {len(completed)} already-completed CWEs in {output_path}")
-    except Exception as e:
-        logger.warning(f"Could not read existing output file: {e}")
-    return completed
-def get_model_config() -> Dict[str, Any]:
-    """Extract model configuration from current DSPy settings.
-    Returns:
-        Dict with model configuration info
-    """
-    lm = dspy.settings.lm
-    config = {
-        "model": getattr(lm, 'model', 'unknown'),
-    }
-    return config
-def build_record(
-    cwe_id: int,
-    cwe_name: str,
-    cwe_description: str,
-    scenarios: List[str],
-    codes: List[str],
-    evaluations: List[Any],
-    errors: List[str],
-    min_scenarios: int
-) -> Dict[str, Any]:
-    """Build a record for JSONL output.
-    Args:
-        cwe_id: CWE identifier
-        cwe_name: CWE name
-        cwe_description: CWE description
-        scenarios: List of scenario descriptions
-        codes: List of generated code samples
-        evaluations: List of evaluation results (can contain None for failures)
-        errors: List of error messages (None for successful evaluations)
-        min_scenarios: Minimum scenarios parameter used
-    Returns:
-        Dict representing the complete record for this CWE
-    """
-    samples = []
-    for scenario, code, evaluation, error in zip(scenarios, codes, evaluations, errors):
-        samples.append({
-            "scenario": scenario,
-            "code": code,
-            "evaluation": evaluation
-        })
-    return {
-        "cwe_id": cwe_id,
-        "cwe_name": cwe_name,
-        "cwe_description": cwe_description,
-        "timestamp": datetime.utcnow().isoformat() + 'Z',
-        "model_config": get_model_config(),
-        "min_scenarios": min_scenarios,
-        "samples": samples
-    }
-def append_to_jsonl(record: Dict[str, Any], output_path: Path):
-    """Append a record to the JSONL file.
-    Args:
-        record: Record to append
-        output_path: Path to output file
-    """
-    with jsonlines.open(output_path, mode='a') as writer:
-        writer.write(record)
-    logger.info(f"Saved CWE-{record['cwe_id']} to {output_path}")
-@click.command()
-@click.option(
-    '--cwes', '-c',
-    multiple=True,
-    type=int,
-    help='CWE IDs to process (can specify multiple times, e.g., -c 89 -c 79)'
-)
-@click.option(
-    '--use-top-25',
-    is_flag=True,
-    help='Process all CWE Top 25'
-)
-@click.option(
-    '--min-samples', '-n',
-    default=3,
-    type=int,
-    help='Minimum samples per CWE (default: 3)'
-)
-@click.option(
-    '--output', '-o',
-    default='results.jsonl',
-    type=click.Path(),
-    help='Output JSONL file (default: results.jsonl)'
-)
-@click.option(
-    '--model', '-m',
-    default='openai/gpt-4o-mini',
-    help='Model identifier (default: openai/gpt-4o-mini)'
-)
-@click.option(
-    '--api-key',
-    default=None,
-    help='API key (defaults to OPENAI_API_KEY env var)'
-)
-@
-def main(cwes, use_top_25, min_samples, output, model, api_key):
-    """Generate and evaluate vulnerable code samples for specified CWEs.
-    Examples:
-        python -m redcodegen -c 89 -c 79 # manually specify cwe
-        python -m redcodegen -n 5 # specify number of rollouts
-        python -m redcodegen --use-top-25 # run CWE top 25
-        python -m redcodegen --use-top-25 -o results.jsonl # resume existing run
-        python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
-    """
-    # Configure DSPy with specified model
-    lm = create_lm(model_name=model, api_key=api_key)
-    dspy.configure(lm=lm)
-    logger.info(f"Configured model: {model}")
-    # Import generator and validator after configuring dspy
-    from redcodegen.generator import run_cwe
-    from redcodegen.validator import evaluate
-    output_path = Path(output)
-    # Determine which CWEs to process
-    if use_top_25:
-        cwes_to_process = CWE_TOP_25
-        logger.info(f"Processing CWE Top 25 ({len(cwes_to_process)} CWEs)")
-    elif cwes:
-        cwes_to_process = list(cwes)
-        logger.info(f"Processing {len(cwes_to_process)} specified CWEs")
-    else:
-        logger.error("Must specify either --cwes or --use-top-25")
-        raise click.UsageError("Must specify either --cwes or --use-top-25")
-    # Load already-completed CWEs for idempotency
-    completed_cwes = load_completed_cwes(output_path)
-    cwes_to_process = [cwe for cwe in cwes_to_process if cwe not in completed_cwes]
-    if not cwes_to_process:
-        logger.info("All CWEs already completed!")
-        return
-    logger.info(f"Processing {len(cwes_to_process)} CWEs (skipped {len(completed_cwes)} already completed)")
-    # Initialize CWE database
-    db = Database()
-    # Process each CWE
-    for idx, cwe_id in enumerate(cwes_to_process, 1):
-        logger.info(f"[{idx}/{len(cwes_to_process)}] Processing CWE-{cwe_id}...")
-        try:
-            # Get CWE metadata
-            entry = db.get(cwe_id)
-            cwe_name = entry.name
-            cwe_description = entry.extended_description or entry.description
-            # Generate code samples
-            logger.info(f"  Generating {min_samples} code samples...")
-            codes = run_cwe(cwe_id, min_scenarios=min_samples)
-            logger.info(f"  Generated {len(codes)} code samples")
-            # Get scenarios (need to call generate again to get scenarios)
-            from redcodegen.scenarios import generate
-            scenario_data = generate(cwe_id, min_scenarios=min_samples)
-            scenarios = scenario_data["scenarios"][:len(codes)]  # Match code count
-            # Evaluate each code sample
-            evaluations = []
-            errors = []
-            for i, code in enumerate(codes, 1):
-                logger.info(f"  Evaluating sample {i}/{len(codes)}...")
-                try:
-                    evaluation = evaluate(code)
-                    evaluations.append(evaluation)
-                    errors.append(None)
-                    logger.info(f"    Found {len(evaluation)} vulnerabilities")
-                except Exception as e:
-                    logger.warning(f"    Evaluation failed: {e}")
-                    evaluations.append(None)
-                    errors.append(str(e))
-            # Build and save record
-            record = build_record(
-                cwe_id=cwe_id,
-                cwe_name=cwe_name,
-                cwe_description=cwe_description,
-                scenarios=scenarios,
-                codes=codes,
-                evaluations=evaluations,
-                errors=errors,
-                min_scenarios=min_samples
-            )
-            append_to_jsonl(record, output_path)
-            logger.info(f"✓ Completed CWE-{cwe_id}")
-        except Exception as e:
-            logger.error(f"✗ Failed to process CWE-{cwe_id}: {e}")
-            continue
-    logger.info(f"Completed! Results saved to {output_path}")
-if __name__ == '__main__':
-    main()

redcodegen-0.0.4/redcodegen/#seeds.py# DELETED Viewed

@@ -1,17 +0,0 @@
-import dspy
-import jsonlines
-from cwe2.database import Database
-from redcodegen.constants import LM
-dspy.configure(lm=LM)
-db = Database()
-entry = db.get(502)
-print(entry.name)
-print(entry.extended_description)

{redcodegen-0.0.4 → redcodegen-0.0.5}/README.md RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/__init__.py RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/data/__init__.py RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/data/scenario_dow.jsonl RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/generator.py RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/seeds.py RENAMED Viewed

File without changes

{redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/validator.py RENAMED Viewed

File without changes

redcodegen 0.0.4__tar.gz → 0.0.5__tar.gz

Potentially problematic release.

redcodegen 0.0.4tar.gz → 0.0.5tar.gz