PyPI - codebook-lab - Versions diffs - 1.0.0__py3-none-any.whl - Mend

codebook-lab 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

codebook_lab/__init__.py +69 -0
codebook_lab/annotate.py +742 -0
codebook_lab/examples.py +87 -0
codebook_lab/experiments.py +319 -0
codebook_lab/metrics.py +1422 -0
codebook_lab/ollama.py +117 -0
codebook_lab/prompts.py +146 -0
codebook_lab/py.typed +0 -0
codebook_lab/tasks/__init__.py +1 -0
codebook_lab/tasks/policy-sentiment/codebook.json +42 -0
codebook_lab/tasks/policy-sentiment/ground-truth.csv +21 -0
codebook_lab/types.py +116 -0
codebook_lab-1.0.0.dist-info/METADATA +338 -0
codebook_lab-1.0.0.dist-info/RECORD +17 -0
codebook_lab-1.0.0.dist-info/WHEEL +5 -0
codebook_lab-1.0.0.dist-info/licenses/LICENSE +661 -0
codebook_lab-1.0.0.dist-info/top_level.txt +1 -0

codebook_lab/ollama.py ADDED Viewed

@@ -0,0 +1,117 @@
+from __future__ import annotations
+import os
+from pathlib import Path
+import shutil
+import subprocess
+import time
+from urllib import error as urllib_error
+from urllib import request as urllib_request
+def get_ollama_base_url() -> str:
+    """Return the Ollama base URL used for connectivity checks.
+    Returns:
+        Base URL string, defaulting to ``http://127.0.0.1:11434`` when
+        ``OLLAMA_HOST`` is not set.
+    """
+    base_url = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434").strip()
+    if "://" not in base_url:
+        base_url = f"http://{base_url}"
+    return base_url.rstrip("/")
+def _can_auto_start_local_ollama(base_url: str) -> bool:
+    """Return whether CodeBook Lab can reasonably auto-start Ollama locally."""
+    return base_url in {
+        "http://127.0.0.1:11434",
+        "http://localhost:11434",
+        "http://0.0.0.0:11434",
+    }
+def ensure_ollama_available(
+    timeout: float = 2.0,
+    start_if_needed: bool = False,
+    startup_timeout: float = 10.0,
+) -> str:
+    """Check that the Ollama server is reachable, optionally starting it locally.
+    Args:
+        timeout: Timeout in seconds for each connectivity probe.
+        start_if_needed: If ``True``, try to start ``ollama serve`` when the
+            default local server is not reachable.
+        startup_timeout: Maximum seconds to wait after auto-starting the server.
+    Returns:
+        Base URL string for the reachable Ollama server.
+    Raises:
+        RuntimeError: If Ollama is not reachable and cannot be started.
+    """
+    base_url = get_ollama_base_url()
+    tags_url = f"{base_url}/api/tags"
+    def _probe() -> bool:
+        try:
+            with urllib_request.urlopen(tags_url, timeout=timeout) as response:
+                return response.status < 400
+        except urllib_error.URLError:
+            return False
+    if _probe():
+        return base_url
+    if not start_if_needed:
+        raise RuntimeError(
+            "Ollama is not reachable. Start the local server with `ollama serve` "
+            f"and make sure it is available at {base_url}."
+        )
+    if not _can_auto_start_local_ollama(base_url):
+        raise RuntimeError(
+            "Ollama is not reachable, and CodeBook Lab only auto-starts local "
+            f"servers on the default host. Current host: {base_url}"
+        )
+    ollama_executable = shutil.which("ollama")
+    if ollama_executable is None:
+        raise RuntimeError(
+            "Ollama is not installed or not on PATH. Install Ollama first, then "
+            "either start it manually with `ollama serve` or rerun the script."
+        )
+    subprocess.Popen(
+        [ollama_executable, "serve"],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        stdin=subprocess.DEVNULL,
+        start_new_session=True,
+        cwd=Path.cwd(),
+    )
+    deadline = time.time() + startup_timeout
+    while time.time() < deadline:
+        if _probe():
+            return base_url
+        time.sleep(0.25)
+    raise RuntimeError(
+        "Tried to auto-start Ollama, but it still was not reachable after "
+        f"{startup_timeout:.1f} seconds at {base_url}."
+    )
+def ensure_ollama_model(model: str) -> None:
+    """Pull an Ollama model so it is available locally before a run.
+    Args:
+        model: Ollama model identifier such as ``"gemma3:270m"``.
+    """
+    ollama_executable = shutil.which("ollama")
+    if ollama_executable is None:
+        raise RuntimeError(
+            "Ollama is not installed or not on PATH, so the model cannot be pulled."
+        )
+    subprocess.run([ollama_executable, "pull", model], check=True)

codebook_lab/prompts.py ADDED Viewed

@@ -0,0 +1,146 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Callable
+@dataclass(frozen=True)
+class PromptContext:
+    """Structured prompt-building context passed to prompt wrapper functions.
+    Attributes:
+        section_name: Codebook section name for the current annotation.
+        section_instruction: Optional section-level instructions from the codebook.
+        annotation_name: Human-readable annotation label inside the section.
+        tooltip: Optional annotation guidance or tooltip text.
+        annotation_type: One of ``"dropdown"``, ``"checkbox"``, ``"likert"``, or ``"textbox"``.
+        options: Dropdown options when ``annotation_type`` is ``"dropdown"``, otherwise ``None``.
+        min_value: Minimum Likert value when applicable, otherwise ``None``.
+        max_value: Maximum Likert value when applicable, otherwise ``None``.
+        example: Example block extracted from the codebook, if present.
+        text: Raw source text being annotated.
+        use_examples: Whether examples should be included in the rendered prompt.
+        response_instructions: Type-specific response instructions generated by CodeBook Lab.
+        core_prompt: Prompt body assembled before the outer wrapper is applied.
+    """
+    section_name: str
+    section_instruction: str
+    annotation_name: str
+    tooltip: str
+    annotation_type: str
+    options: list[str] | None
+    min_value: int | None
+    max_value: int | None
+    example: str
+    text: str
+    use_examples: bool
+    response_instructions: str
+    core_prompt: str
+PromptWrapper = Callable[[PromptContext], str]
+PromptType = str | PromptWrapper
+def _standard_wrapper(context: PromptContext) -> str:
+    """Render the default prompt wrapper for a ``PromptContext``."""
+    return f'{context.core_prompt}\n\n---\n\nText: \n"{context.text}"\n\nResponse: \n'
+def _persona_wrapper(context: PromptContext) -> str:
+    """Render the built-in persona prompt wrapper for a ``PromptContext``."""
+    prefix = (
+        "You are an expert political scientist and data annotator with extensive "
+        "experience in analyzing political discourse and parliamentary debates.\n\n"
+        "Task: Annotate the following text using the criteria below. Your annotation "
+        "should be precise, consistent, and based solely on the text content.\n\n"
+    )
+    suffix = f'\n\n---\n\nText: \n"{context.text}"\n\nResponse: \n'
+    return f"{prefix}{context.core_prompt}{suffix}"
+def _cot_wrapper(context: PromptContext) -> str:
+    """Render the built-in chain-of-thought prompt wrapper for a ``PromptContext``."""
+    suffix = "\n\nI'll think through this step by step:\n\n"
+    suffix += "1. First, I'll identify key parts of the text relevant to this dimension\n"
+    suffix += "2. Next, I'll analyze how these elements relate to the annotation criteria\n"
+    suffix += "3. Then, I'll consider my assessment carefully\n"
+    suffix += "4. Finally, I'll make my selection based on this analysis\n"
+    suffix += f'\n\n---\n\nText: \n"{context.text}"\n\n'
+    suffix += "Step-by-step analysis:\n\n[Think through your reasoning here]\n\n"
+    suffix += "Response: \n"
+    return f"{context.core_prompt}{suffix}"
+_PROMPT_WRAPPERS: dict[str, PromptWrapper] = {
+    "standard": _standard_wrapper,
+    "persona": _persona_wrapper,
+    "CoT": _cot_wrapper,
+}
+def register_prompt_wrapper(name: str, wrapper: PromptWrapper, overwrite: bool = False) -> None:
+    """Register a prompt wrapper for use in Python and CLI experiment configs.
+    Args:
+        name: String key users will pass as ``prompt_type`` such as ``"concise"``.
+        wrapper: Callable accepting a :class:`PromptContext` and returning a full prompt string.
+        overwrite: Set to ``True`` to replace an existing wrapper with the same name.
+    """
+    if not name:
+        raise ValueError("Prompt wrapper name must be a non-empty string.")
+    if not overwrite and name in _PROMPT_WRAPPERS:
+        raise ValueError(f"Prompt wrapper '{name}' is already registered.")
+    _PROMPT_WRAPPERS[name] = wrapper
+def get_prompt_wrapper(name: str) -> PromptWrapper:
+    """Return a registered prompt wrapper by name.
+    Args:
+        name: Prompt wrapper key, for example ``"standard"`` or a custom name.
+    Returns:
+        The callable prompt wrapper registered under ``name``.
+    """
+    try:
+        return _PROMPT_WRAPPERS[name]
+    except KeyError as exc:
+        available = ", ".join(sorted(_PROMPT_WRAPPERS))
+        raise ValueError(
+            f"Unknown prompt wrapper '{name}'. Available wrappers: {available}."
+        ) from exc
+def list_prompt_wrappers() -> list[str]:
+    """Return the sorted names of all registered prompt wrappers."""
+    return sorted(_PROMPT_WRAPPERS)
+def get_prompt_type_name(prompt_type: PromptType) -> str:
+    """Return a stable display name for a prompt type or callable wrapper.
+    Args:
+        prompt_type: Either a registered wrapper name or a callable wrapper.
+    Returns:
+        A string name suitable for config files and experiment metadata.
+    """
+    if isinstance(prompt_type, str):
+        return prompt_type
+    return getattr(prompt_type, "__name__", "custom_prompt_wrapper")
+def render_prompt(prompt_type: PromptType, context: PromptContext) -> str:
+    """Render a prompt using a registered wrapper name or a direct callable.
+    Args:
+        prompt_type: Registered wrapper name or callable accepting ``PromptContext``.
+        context: Structured prompt inputs for the current annotation.
+    Returns:
+        A full prompt string ready to send to the model.
+    """
+    wrapper = get_prompt_wrapper(prompt_type) if isinstance(prompt_type, str) else prompt_type
+    return wrapper(context)

codebook_lab/py.typed ADDED Viewed

File without changes

codebook_lab/tasks/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Bundled example tasks distributed with CodeBook Lab."""

codebook_lab/tasks/policy-sentiment/codebook.json ADDED Viewed

@@ -0,0 +1,42 @@
+{
+  "header_column": "title",
+  "text_column": "text",
+  "section_1": {
+    "section_name": "Policy Sentiment",
+    "section_instruction": "Read the short text and assess whether it expresses an evaluative stance toward a public policy, proposal, or political decision.",
+    "annotations": {
+      "annotation_1": {
+        "name": "Explicit evaluation",
+        "type": "checkbox",
+        "tooltip": "Tick this box if the text clearly expresses approval, disapproval, praise, criticism, support, concern, or another evaluative stance toward the policy or decision. Leave it unticked if the text is mainly descriptive or procedural and does not clearly express a stance.",
+        "example": "Text: \n\"The mayor called the clean transport plan a practical step that will cut congestion and improve air quality.\"\n\nResponse: \n{\"response\": true}\n\n---\n\nText: \n\"The committee will hear witnesses on Tuesday and vote on amendments on Thursday.\"\n\nResponse: \n{\"response\": false}\n\n---\n\nText: \n\"The new childcare package is overdue and will make everyday life easier for working parents.\"\n\nResponse: \n{\"response\": true}"
+      },
+      "annotation_2": {
+        "name": "Direction",
+        "type": "dropdown",
+        "tooltip": "If the text expresses a stance, identify whether the overall direction is positive, negative, or mixed. Use 'no clear sentiment' only when the text does not convey an evaluative stance.",
+        "example": "Text: \n\"The childcare reform is overdue and will give working parents real support.\"\n\nResponse: \n{\"response\": \"positive\"}\n\n---\n\nText: \n\"The housing package may help first-time buyers, but its financing assumptions are unrealistic.\"\n\nResponse: \n{\"response\": \"mixed\"}\n\n---\n\nText: \n\"The proposal is wasteful and risks undermining public trust.\"\n\nResponse: \n{\"response\": \"negative\"}\n\n---\n\nText: \n\"Officials released the updated timetable for debate and implementation.\"\n\nResponse: \n{\"response\": \"no clear sentiment\"}",
+        "options": [
+          "positive",
+          "negative",
+          "mixed",
+          "no clear sentiment"
+        ]
+      },
+      "annotation_3": {
+        "name": "Intensity",
+        "type": "likert",
+        "tooltip": "Rate the overall sentiment on a 5-point scale where 1 = strongly negative, 2 = somewhat negative, 3 = mixed, neutral, or no clear sentiment, 4 = somewhat positive, and 5 = strongly positive.",
+        "example": "Text: \n\"The subsidies are wasteful and will damage market confidence.\"\n\nResponse: \n{\"response\": 1}\n\n---\n\nText: \n\"The tax credit is helpful, though it does not go far enough to solve the problem.\"\n\nResponse: \n{\"response\": 4}\n\n---\n\nText: \n\"The reform has clear advantages, but its implementation risks remain significant.\"\n\nResponse: \n{\"response\": 3}\n\n---\n\nText: \n\"The briefing note lists the agencies involved in implementation.\"\n\nResponse: \n{\"response\": 3}",
+        "min_value": 1,
+        "max_value": 5
+      },
+      "annotation_4": {
+        "name": "Evidence",
+        "type": "textbox",
+        "tooltip": "Provide a short phrase or sentence from the text that best supports your sentiment judgement. Keep the response brief.",
+        "example": "Text: \n\"The regional rail package is a sensible investment that will reconnect towns that have been neglected for decades.\"\n\nResponse: \n{\"response\": \"sensible investment that will reconnect towns\"}\n\n---\n\nText: \n\"The housing proposal may help some renters, but the funding model is fragile and unfair to local councils.\"\n\nResponse: \n{\"response\": \"funding model is fragile and unfair to local councils\"}"
+      }
+    }
+  }
+}

codebook_lab/tasks/policy-sentiment/ground-truth.csv ADDED Viewed

@@ -0,0 +1,21 @@
+doc_id,title,topic,speaker_type,text,Policy Sentiment_Explicit evaluation,Policy Sentiment_Direction,Policy Sentiment_Intensity,Policy Sentiment_Evidence
+ps_001,"Carbon Border Reform Receives Broad Support",climate,legislator,"The carbon border reform is a sensible compromise that protects domestic industry while finally aligning trade rules with climate goals.",1,positive,5,"sensible compromise"
+ps_002,"Housing Plan Faces Cost Criticism",housing,policy analyst,"The housing plan has an admirable goal, but its financing assumptions are unrealistic and the burden on municipalities is far too high.",1,negative,2,"financing assumptions are unrealistic"
+ps_003,"Committee Schedule Released",legislative process,journalist,"The committee will hear witnesses on Tuesday, debate amendments on Wednesday, and hold a final vote on Thursday.",0,no clear sentiment,3,"hear witnesses on Tuesday, debate amendments on Wednesday"
+ps_004,"Minimum Wage Proposal Wins Qualified Praise",labour,union representative,"The proposal is a step in the right direction for low-paid workers, although the exemptions remain too broad to celebrate it fully.",1,mixed,3,"a step in the right direction"
+ps_005,"Mayor Welcomes Rail Subsidy",transport,mayor,"This rail subsidy is exactly the kind of long-term investment our region needs, and it will make commuting more affordable for thousands of residents.",1,positive,5,"exactly the kind of long-term investment our region needs"
+ps_006,"Opposition MP Attacks Surveillance Bill",civil liberties,opposition MP,"The surveillance bill is intrusive, poorly drafted, and a dangerous expansion of executive power.",1,negative,1,"dangerous expansion of executive power"
+ps_007,"Analyst Notes Pension Reform Tradeoffs",welfare,policy analyst,"The pension reform may improve long-run fiscal sustainability, but it also asks future retirees to accept lower benefits.",1,mixed,3,"may improve long-run fiscal sustainability, but it also asks future retirees to accept lower benefits"
+ps_008,"Union Backs Sick Leave Expansion",public health,union representative,"Expanding sick leave is a welcome and overdue reform that gives workers basic security when they fall ill.",1,positive,5,"welcome and overdue reform"
+ps_009,"Manufacturers Criticise Plastic Ban",environment,business association,"The plastic ban moves too quickly, raises compliance costs, and gives firms too little time to adapt.",1,negative,2,"raises compliance costs"
+ps_010,"Turnout Briefing Note Released",elections,research institute,"The briefing note reports turnout rates by district and summarizes which age groups participated at the highest levels.",0,no clear sentiment,3,"reports turnout rates by district"
+ps_011,"Editorial Supports Flood Resilience Fund",climate adaptation,editorial board,"The new flood resilience fund is a constructive response to repeated climate shocks, even if implementation will need close oversight.",1,positive,4,"constructive response to repeated climate shocks"
+ps_012,"Voucher Pilot Draws Mixed Community Response",education,civil society group,"Some parents welcome the voucher pilot as a source of choice, but others worry it will deepen inequality between schools.",1,mixed,3,"welcome the voucher pilot as a source of choice, but others worry it will deepen inequality"
+ps_013,"Treasury Defends Energy Rebate",cost of living,government minister,"The energy rebate is a fair and timely measure that will shield vulnerable households from another difficult winter.",1,positive,5,"fair and timely measure"
+ps_014,"Think Tank Warns About Farm Subsidy Design",agriculture,think tank,"The subsidy may keep some farms afloat, but its design rewards inefficiency and ignores long-term environmental costs.",1,mixed,3,"rewards inefficiency and ignores long-term environmental costs"
+ps_015,"Election Commission Publishes Guidance",elections,election commission,"The commission published updated guidance on postal voting procedures and reporting deadlines for local administrators.",0,no clear sentiment,3,"published updated guidance on postal voting procedures"
+ps_016,"Mayor Questions Congestion Charge Delay",transport,mayor,"Delaying the congestion charge is a mistake that weakens the city's climate commitments and prolongs traffic problems.",1,negative,1,"is a mistake"
+ps_017,"NGO Gives Cautious Support to Water Plan",environment,civil society group,"The water plan is a meaningful improvement, although its enforcement provisions remain weaker than campaigners had hoped.",1,mixed,3,"meaningful improvement"
+ps_018,"Party Spokesperson Praises Apprenticeship Scheme",education,party spokesperson,"The apprenticeship scheme is exactly the kind of practical reform that links education policy to good jobs.",1,positive,5,"exactly the kind of practical reform"
+ps_019,"Analyst Describes Budget Rollout",fiscal policy,policy analyst,"The finance ministry will publish the budget tables on Monday and present departmental spending plans later in the week.",0,no clear sentiment,3,"will publish the budget tables on Monday"
+ps_020,"Business Group Splits on Remote Work Tax Break",labour,business association,"Some employers see the tax break as a sensible incentive, while others argue it is poorly targeted and unlikely to change behaviour.",1,mixed,3,"sensible incentive, while others argue it is poorly targeted"

codebook_lab/types.py ADDED Viewed

@@ -0,0 +1,116 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    import pandas as pd
+@dataclass
+class AnnotationRunResult:
+    """Result returned by ``run_annotation``.
+    Attributes:
+        model: Ollama model name used for the run, such as ``"gemma3:270m"``.
+        output_path: Filesystem path to the generated annotation CSV.
+        experiment_directory: Directory containing run metadata and outputs.
+        config: Serializable experiment configuration written to ``config.json``.
+        char_counts: Prompt and response character counts collected during the run.
+        timing_data: Inference timing summary, including total and average seconds.
+        emissions: CodeCarbon estimate in kilograms of CO2 equivalent, or ``None``.
+        dataframe: Pandas DataFrame containing the annotated rows written to disk.
+    """
+    model: str
+    output_path: Path
+    experiment_directory: Path
+    config: dict[str, Any]
+    char_counts: dict[str, Any]
+    timing_data: dict[str, Any]
+    emissions: float | None
+    dataframe: pd.DataFrame
+@dataclass
+class MetricsRunResult:
+    """Result returned by ``run_metrics``.
+    Attributes:
+        output_csv: Filesystem path to the aggregate metrics CSV that was updated.
+        report_file: Filesystem path to the per-column classification report text file.
+        columns_to_compare: Annotation columns included in the evaluation.
+        metrics_by_column: Nested dictionary of computed metrics keyed by column name.
+        reports: Human-readable report text keyed by annotation column name.
+        total_inference_time: Total model inference time in seconds, if available.
+        avg_inference_time: Mean inference time per annotation request in seconds, if available.
+        input_chars: Total prompt characters sent to the model, if available.
+        output_chars: Total response characters returned by the model, if available.
+        energy_consumed: Energy consumption in kilowatt-hours, if available.
+        emissions: Emissions estimate in kilograms of CO2 equivalent, if available.
+        cpu_model: CPU metadata recorded by CodeCarbon, if available.
+        gpu_model: GPU metadata recorded by CodeCarbon, if available.
+        summary_text: Plain-text summary of the main evaluation metrics.
+    """
+    output_csv: Path
+    report_file: Path
+    columns_to_compare: list[str]
+    metrics_by_column: dict[str, dict[str, Any]]
+    reports: dict[str, str]
+    total_inference_time: float | None
+    avg_inference_time: float | None
+    input_chars: int | None
+    output_chars: int | None
+    energy_consumed: float | None
+    emissions: float | None
+    cpu_model: str | None
+    gpu_model: str | None
+    summary_text: str
+@dataclass(frozen=True)
+class ExperimentSpec:
+    """Declarative specification for one experiment run in a sweep.
+    Attributes:
+        task: Task folder name under ``tasks/``, for example ``"policy-sentiment"``.
+        model: Ollama model identifier, such as ``"gemma3:270m"``.
+        use_examples: Whether to include worked examples from the codebook.
+        prompt_type: Registered prompt wrapper name, for example ``"standard"``.
+        temperature: Optional sampling temperature as ``None``, string, or float.
+        top_p: Optional nucleus-sampling value as ``None``, string, or float.
+        process_textbox: Whether textbox annotations should be generated and scored.
+        country_iso_code: Three-letter ISO 3166-1 alpha-3 code for CodeCarbon.
+    """
+    task: str
+    model: str
+    use_examples: bool = False
+    prompt_type: str = "standard"
+    temperature: float | None = None
+    top_p: float | None = None
+    process_textbox: bool = False
+    country_iso_code: str = "USA"
+@dataclass
+class ExperimentRunResult:
+    """Combined result returned by ``run_experiment``.
+    Attributes:
+        spec: The experiment specification that was executed.
+        experiment_directory: Directory containing this run's outputs.
+        model_id: Stable model/config identifier used in the metrics log.
+        label: Task label written to the metrics CSV.
+        annotation: Result object returned by ``run_annotation``.
+        metrics: Result object returned by ``run_metrics``.
+    """
+    spec: ExperimentSpec
+    experiment_directory: Path
+    model_id: str
+    label: str
+    annotation: AnnotationRunResult
+    metrics: MetricsRunResult