codebook-lab 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/PKG-INFO +9 -5
  2. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/README.md +8 -4
  3. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/annotate.py +143 -105
  4. codebook_lab-1.1.1/codebook_lab/conditions.py +154 -0
  5. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/metrics.py +90 -30
  6. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/PKG-INFO +9 -5
  7. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/SOURCES.txt +2 -4
  8. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/pyproject.toml +1 -1
  9. codebook_lab-1.1.1/tests/test_conditions.py +144 -0
  10. codebook_lab-1.0.0/scripts/multi_run_example.py +0 -41
  11. codebook_lab-1.0.0/scripts/single_run_example.py +0 -48
  12. codebook_lab-1.0.0/tests/__init__.py +0 -0
  13. codebook_lab-1.0.0/tests/conftest.py +0 -13
  14. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/LICENSE +0 -0
  15. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/__init__.py +0 -0
  16. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/examples.py +0 -0
  17. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/experiments.py +0 -0
  18. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/ollama.py +0 -0
  19. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/prompts.py +0 -0
  20. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/py.typed +0 -0
  21. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/tasks/__init__.py +0 -0
  22. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/tasks/policy-sentiment/codebook.json +0 -0
  23. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/tasks/policy-sentiment/ground-truth.csv +0 -0
  24. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab/types.py +0 -0
  25. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/dependency_links.txt +0 -0
  26. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/requires.txt +0 -0
  27. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/top_level.txt +0 -0
  28. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/setup.cfg +0 -0
  29. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_examples.py +0 -0
  30. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_experiments.py +0 -0
  31. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_metrics_summary.py +0 -0
  32. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_package_import.py +0 -0
  33. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_prompts.py +0 -0
  34. {codebook_lab-1.0.0 → codebook_lab-1.1.1}/tests/test_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codebook-lab
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: An LLM annotation experiment pipeline for computational social science.
5
5
  Author: Lorcan McLaren
6
6
  License-Expression: AGPL-3.0-only
@@ -45,7 +45,7 @@ Dynamic: license-file
45
45
 
46
46
  # CodeBook Lab
47
47
 
48
- [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921)
48
+ [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921) [![PyPI](https://img.shields.io/pypi/v/codebook-lab)](https://pypi.org/project/codebook-lab/) [![Python](https://img.shields.io/pypi/pyversions/codebook-lab)](https://pypi.org/project/codebook-lab/) [![License](https://img.shields.io/pypi/l/codebook-lab)](https://pypi.org/project/codebook-lab/)
49
49
 
50
50
  CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
51
51
 
@@ -297,7 +297,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
297
297
  If you use CodeBook Lab in research, please cite both:
298
298
 
299
299
  - this software package
300
- - the associated preprint
300
+ - the associated arXiv preprint
301
301
 
302
302
  Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
303
303
 
@@ -324,7 +324,7 @@ BibTeX:
324
324
 
325
325
  APSR style:
326
326
 
327
- McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. Preprint.
327
+ McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
328
328
 
329
329
  BibTeX:
330
330
 
@@ -333,6 +333,10 @@ BibTeX:
333
333
  author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
334
334
  title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
335
335
  year = {2026},
336
- note = {Preprint}
336
+ eprint = {2603.26898},
337
+ archivePrefix = {arXiv},
338
+ primaryClass = {cs.CL},
339
+ doi = {10.48550/arXiv.2603.26898},
340
+ url = {https://arxiv.org/abs/2603.26898}
337
341
  }
338
342
  ```
@@ -1,6 +1,6 @@
1
1
  # CodeBook Lab
2
2
 
3
- [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921)
3
+ [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921) [![PyPI](https://img.shields.io/pypi/v/codebook-lab)](https://pypi.org/project/codebook-lab/) [![Python](https://img.shields.io/pypi/pyversions/codebook-lab)](https://pypi.org/project/codebook-lab/) [![License](https://img.shields.io/pypi/l/codebook-lab)](https://pypi.org/project/codebook-lab/)
4
4
 
5
5
  CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
6
6
 
@@ -252,7 +252,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
252
252
  If you use CodeBook Lab in research, please cite both:
253
253
 
254
254
  - this software package
255
- - the associated preprint
255
+ - the associated arXiv preprint
256
256
 
257
257
  Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
258
258
 
@@ -279,7 +279,7 @@ BibTeX:
279
279
 
280
280
  APSR style:
281
281
 
282
- McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. Preprint.
282
+ McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
283
283
 
284
284
  BibTeX:
285
285
 
@@ -288,6 +288,10 @@ BibTeX:
288
288
  author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
289
289
  title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
290
290
  year = {2026},
291
- note = {Preprint}
291
+ eprint = {2603.26898},
292
+ archivePrefix = {arXiv},
293
+ primaryClass = {cs.CL},
294
+ doi = {10.48550/arXiv.2603.26898},
295
+ url = {https://arxiv.org/abs/2603.26898}
292
296
  }
293
297
  ```
@@ -8,9 +8,24 @@ import pandas as pd
8
8
  import regex
9
9
  from codecarbon import OfflineEmissionsTracker
10
10
  from langchain_core.prompts import ChatPromptTemplate
11
- from langchain_ollama.llms import OllamaLLM
12
-
11
+ from langchain_ollama.chat_models import ChatOllama
12
+ from pydantic import BaseModel
13
+
14
+ from .conditions import (
15
+ get_annotation_column_name,
16
+ get_annotation_entries,
17
+ is_annotation_applicable,
18
+ normalize_annotation_response_value,
19
+ )
13
20
  from .ollama import ensure_ollama_available
21
+
22
+
23
+ class AnnotationResponse(BaseModel):
24
+ """Schema used by ChatOllama structured output to guarantee valid JSON."""
25
+ response: str
26
+
27
+
28
+ _PROMPT_TEMPLATE = ChatPromptTemplate.from_template("""{question}""")
14
29
  from .prompts import PromptContext, get_prompt_type_name, render_prompt
15
30
  from .types import AnnotationRunResult
16
31
 
@@ -55,17 +70,20 @@ class _AnnotationProgressBar:
55
70
  sys.stderr.write("\n")
56
71
  sys.stderr.flush()
57
72
 
73
+ def skip(self, count: int = 1) -> None:
74
+ """Reduce the remaining work estimate when prompts are skipped."""
75
+ if count <= 0:
76
+ return
77
+ self.total_steps = max(self.completed_steps, self.total_steps - count)
78
+
58
79
 
59
80
  def _count_annotations(codebook, process_textbox=False):
60
- """Count how many annotation prompts will be issued for one row."""
81
+ """Count the maximum number of annotation prompts that could be issued for one row."""
61
82
  count = 0
62
- for key, section in codebook.items():
63
- if not key.startswith("section_"):
83
+ for _, _, _, annotation in get_annotation_entries(codebook):
84
+ if annotation.get("type") == "textbox" and not process_textbox:
64
85
  continue
65
- for annotation in section.get("annotations", {}).values():
66
- if annotation.get("type") == "textbox" and not process_textbox:
67
- continue
68
- count += 1
86
+ count += 1
69
87
  return count
70
88
 
71
89
  def load_codebook(codebook_path):
@@ -90,19 +108,10 @@ def get_annotation_column_names(codebook):
90
108
  Returns:
91
109
  List of column names in ``<section_name>_<annotation_name>`` format.
92
110
  """
93
- annotation_columns = []
94
-
95
- for key, section in codebook.items():
96
- if not key.startswith("section_"):
97
- continue
98
-
99
- section_name = section["section_name"]
100
- annotations = section.get("annotations", {})
101
-
102
- for annotation in annotations.values():
103
- annotation_columns.append(f"{section_name}_{annotation['name']}")
104
-
105
- return annotation_columns
111
+ return [
112
+ get_annotation_column_name(section_content, annotation)
113
+ for _, section_content, _, annotation in get_annotation_entries(codebook)
114
+ ]
106
115
 
107
116
  def load_input_dataframe(csv_path, codebook):
108
117
  """Load the input CSV and remove any existing annotation label columns.
@@ -161,24 +170,23 @@ def setup_model(model_name, temperature=None, top_p=None):
161
170
  top_p: Optional nucleus-sampling value.
162
171
 
163
172
  Returns:
164
- LangChain runnable that accepts ``{"question": prompt}``.
173
+ ``ChatOllama`` instance. The caller builds structured-output chains
174
+ from this model as needed.
165
175
  """
166
176
  model_kwargs = {}
167
177
  if temperature is not None:
168
178
  model_kwargs['temperature'] = float(temperature)
169
179
  if top_p is not None:
170
180
  model_kwargs['top_p'] = float(top_p)
171
-
172
- llm = OllamaLLM(model=model_name, **model_kwargs)
173
- prompt_template = ChatPromptTemplate.from_template("""{question}""")
174
- chain = prompt_template | llm
175
- return chain
181
+
182
+ llm = ChatOllama(model=model_name, **model_kwargs)
183
+ return llm
176
184
 
177
185
  def generate_response(chain, prompt, char_counts, timing_data, row_num=None, annotation_name=None):
178
186
  """Run one prompt through the model and update timing/count statistics.
179
187
 
180
188
  Args:
181
- chain: Runnable returned by :func:`setup_model`.
189
+ chain: ``ChatOllama`` instance returned by :func:`setup_model`.
182
190
  prompt: Fully rendered prompt string.
183
191
  char_counts: Mutable dict with ``input_chars`` and ``output_chars`` integers.
184
192
  timing_data: Mutable dict with inference timing counters.
@@ -191,28 +199,42 @@ def generate_response(chain, prompt, char_counts, timing_data, row_num=None, ann
191
199
  try:
192
200
  # Track input characters
193
201
  char_counts['input_chars'] += len(prompt)
194
-
202
+
195
203
  if row_num and annotation_name:
196
204
  logger.info("[Row %s] Sending request for: %s...", row_num, annotation_name)
197
205
 
206
+ structured_chain = (
207
+ _PROMPT_TEMPLATE
208
+ | chain.with_structured_output(
209
+ AnnotationResponse, method="json_schema", include_raw=True
210
+ )
211
+ )
212
+
198
213
  start_time = time.time()
199
- response = chain.invoke({"question": prompt})
214
+ result = structured_chain.invoke({"question": prompt})
200
215
  end_time = time.time()
201
216
  inference_time = end_time - start_time
202
217
  timing_data['total_inference_time'] += inference_time
203
218
  timing_data['inference_count'] += 1
204
219
 
220
+ if result.get("parsed") is not None:
221
+ response = result["parsed"].model_dump_json()
222
+ else:
223
+ raw = result.get("raw")
224
+ response = raw.content if raw else ""
225
+ logger.debug("Structured parsing failed, using raw response for %s", annotation_name)
226
+
205
227
  char_counts['output_chars'] += len(response)
206
228
 
207
229
  if row_num and annotation_name:
208
230
  logger.info("[Row %s] %s done (%.1fs)", row_num, annotation_name, inference_time)
209
-
231
+
210
232
  return response
211
233
  except Exception as e:
212
234
  logger.warning("Error generating response: %s", e)
213
235
  return ""
214
236
 
215
- def extract_json_response(response, annotation_type, min_value=None, max_value=None):
237
+ def extract_json_response(response, annotation_type, min_value=None, max_value=None, options=None):
216
238
  """
217
239
  Extract and validate JSON response based on annotation type
218
240
 
@@ -221,12 +243,22 @@ def extract_json_response(response, annotation_type, min_value=None, max_value=N
221
243
  annotation_type: Annotation type string such as ``"dropdown"`` or ``"likert"``.
222
244
  min_value: Optional integer lower bound for Likert annotations.
223
245
  max_value: Optional integer upper bound for Likert annotations.
246
+ options: Optional dropdown option list used to normalize categorical labels.
224
247
 
225
248
  Returns:
226
249
  Parsed response value coerced into the expected annotation format.
227
250
  """
228
251
  pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')
229
252
  json_strings = pattern.findall(response)
253
+
254
+ def normalize_dropdown_value(value):
255
+ return normalize_annotation_response_value(
256
+ {
257
+ "type": "dropdown",
258
+ "options": options or [],
259
+ },
260
+ value,
261
+ )
230
262
 
231
263
  for json_string in json_strings:
232
264
  try:
@@ -235,7 +267,7 @@ def extract_json_response(response, annotation_type, min_value=None, max_value=N
235
267
 
236
268
  # Validate and format based on annotation type
237
269
  if annotation_type == "dropdown":
238
- return response_value
270
+ return normalize_dropdown_value(response_value)
239
271
  elif annotation_type == "checkbox":
240
272
  # Convert to 1 or 0
241
273
  if isinstance(response_value, bool):
@@ -251,7 +283,7 @@ def extract_json_response(response, annotation_type, min_value=None, max_value=N
251
283
  return 0
252
284
  elif annotation_type == "textbox":
253
285
  # Return as string
254
- return str(response_value)
286
+ return str(response_value).strip()
255
287
  elif annotation_type == "likert":
256
288
  # Validate is within range and convert to int
257
289
  try:
@@ -266,12 +298,16 @@ def extract_json_response(response, annotation_type, min_value=None, max_value=N
266
298
  return response_value
267
299
 
268
300
  # Fallback
269
- return response_value
301
+ return str(response_value).strip() if isinstance(response_value, str) else response_value
270
302
  except json.JSONDecodeError as e:
271
303
  logger.debug("Error parsing JSON: %s", e)
272
304
 
273
305
  # If no valid JSON, try to extract direct response
274
- if annotation_type == "checkbox":
306
+ stripped_response = response.strip()
307
+
308
+ if annotation_type == "dropdown":
309
+ return normalize_dropdown_value(stripped_response)
310
+ elif annotation_type == "checkbox":
275
311
  if "yes" in response.lower() or "true" in response.lower():
276
312
  return 1
277
313
  elif "no" in response.lower() or "false" in response.lower():
@@ -288,8 +324,10 @@ def extract_json_response(response, annotation_type, min_value=None, max_value=N
288
324
  except ValueError:
289
325
  continue
290
326
  return (min_value + max_value) // 2 # Default to middle value
327
+ elif annotation_type == "textbox":
328
+ return stripped_response
291
329
 
292
- return response # Return raw response as fallback
330
+ return None
293
331
 
294
332
  def format_prompt(section_name, section_instruction, name, tooltip, annotation_type,
295
333
  options=None, min_value=None, max_value=None, example=None,
@@ -466,73 +504,73 @@ def classify_text(chain, text, codebook, prompt_type="standard", use_examples=Fa
466
504
  if timing_data is None:
467
505
  timing_data = {'total_inference_time': 0, 'inference_count': 0}
468
506
 
469
- for key, section in codebook.items():
470
- if key.startswith('section_'):
471
- section_name = section['section_name']
472
- section_instruction = section.get('section_instruction', '')
473
- annotations = section['annotations']
474
-
475
- for annotation_key, annotation in annotations.items():
476
- name = annotation['name']
477
- annotation_type = annotation['type']
478
-
479
- # Skip textbox type annotations if process_textbox is False
480
- if annotation_type == "textbox" and not process_textbox:
481
- continue
482
-
483
- tooltip = annotation.get('tooltip', '')
484
- example = annotation.get('example', '')
485
-
486
- # Get type-specific parameters
487
- options = None
488
- min_value = None
489
- max_value = None
490
-
491
- if annotation_type == "dropdown":
492
- options = annotation.get('options', [])
493
- elif annotation_type == "likert":
494
- min_value = annotation.get('min_value')
495
- max_value = annotation.get('max_value')
496
-
497
- # Format prompt based on specified type and annotation type
498
- prompt = format_prompt(
499
- section_name,
500
- section_instruction,
501
- name,
502
- tooltip,
503
- annotation_type,
504
- options,
505
- min_value,
506
- max_value,
507
- example,
508
- text,
509
- prompt_type=prompt_type,
510
- use_examples=use_examples
511
- )
512
-
513
- annotation_full_name = f"{section_name}_{name}"
514
- response_text = generate_response(
515
- chain,
516
- prompt,
517
- char_counts,
518
- timing_data,
519
- row_num=row_num,
520
- annotation_name=annotation_full_name
521
- )
522
- response_value = extract_json_response(
523
- response_text,
524
- annotation_type,
525
- min_value,
526
- max_value
527
- )
528
-
529
- if response_value is not None:
530
- # Store the response with a meaningful column name
531
- column_name = f"{section_name}_{name}"
532
- responses[column_name] = response_value
533
-
534
- if progress_bar is not None and row_num is not None and total_rows is not None:
535
- progress_bar.update(row_num, total_rows, annotation_full_name)
507
+ for section_key, section, annotation_key, annotation in get_annotation_entries(codebook):
508
+ section_name = section['section_name']
509
+ section_instruction = section.get('section_instruction', '')
510
+ name = annotation['name']
511
+ annotation_type = annotation['type']
512
+ annotation_full_name = f"{section_name}_{name}"
513
+ column_name = get_annotation_column_name(section, annotation)
514
+
515
+ if annotation_type == "textbox" and not process_textbox:
516
+ if progress_bar is not None:
517
+ progress_bar.skip()
518
+ continue
519
+
520
+ if not is_annotation_applicable(codebook, section_key, annotation_key, responses):
521
+ responses[column_name] = None
522
+ if progress_bar is not None:
523
+ progress_bar.skip()
524
+ continue
525
+
526
+ tooltip = annotation.get('tooltip', '')
527
+ example = annotation.get('example', '')
528
+
529
+ options = None
530
+ min_value = None
531
+ max_value = None
532
+
533
+ if annotation_type == "dropdown":
534
+ options = annotation.get('options', [])
535
+ elif annotation_type == "likert":
536
+ min_value = annotation.get('min_value')
537
+ max_value = annotation.get('max_value')
538
+
539
+ prompt = format_prompt(
540
+ section_name,
541
+ section_instruction,
542
+ name,
543
+ tooltip,
544
+ annotation_type,
545
+ options,
546
+ min_value,
547
+ max_value,
548
+ example,
549
+ text,
550
+ prompt_type=prompt_type,
551
+ use_examples=use_examples
552
+ )
553
+
554
+ response_text = generate_response(
555
+ chain,
556
+ prompt,
557
+ char_counts,
558
+ timing_data,
559
+ row_num=row_num,
560
+ annotation_name=annotation_full_name
561
+ )
562
+ response_value = extract_json_response(
563
+ response_text,
564
+ annotation_type,
565
+ min_value,
566
+ max_value,
567
+ options=options,
568
+ )
569
+
570
+ responses[column_name] = response_value if response_value is not None else None
571
+
572
+ if progress_bar is not None and row_num is not None and total_rows is not None:
573
+ progress_bar.update(row_num, total_rows, annotation_full_name)
536
574
 
537
575
  return responses, char_counts, timing_data
538
576
 
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def get_sorted_annotation_keys(section_content: dict[str, Any]) -> list[str]:
9
+ """Return annotation keys in the same stable order used by CodeBook Studio."""
10
+
11
+ def sort_key(annotation_key: str) -> tuple[int, int | str]:
12
+ suffix = annotation_key.split("_")[-1]
13
+ return (0, int(suffix)) if suffix.isdigit() else (1, annotation_key)
14
+
15
+ return sorted(section_content.get("annotations", {}).keys(), key=sort_key)
16
+
17
+
18
+ def get_annotation_column_name(section_content: dict[str, Any], annotation: dict[str, Any]) -> str:
19
+ """Return the canonical CSV column name for an annotation."""
20
+ return f"{section_content['section_name']}_{annotation['name']}"
21
+
22
+
23
+ def get_annotation_entries(codebook: dict[str, Any]) -> list[tuple[str, dict[str, Any], str, dict[str, Any]]]:
24
+ """Return all section/annotation entries in display order."""
25
+ entries: list[tuple[str, dict[str, Any], str, dict[str, Any]]] = []
26
+
27
+ for section_key, section_content in codebook.items():
28
+ if not section_key.startswith("section_"):
29
+ continue
30
+ for annotation_key in get_sorted_annotation_keys(section_content):
31
+ annotation = section_content.get("annotations", {}).get(annotation_key, {})
32
+ entries.append((section_key, section_content, annotation_key, annotation))
33
+
34
+ return entries
35
+
36
+
37
+ def get_annotation_lookup(
38
+ codebook: dict[str, Any],
39
+ ) -> dict[tuple[str, str], tuple[dict[str, Any], dict[str, Any]]]:
40
+ """Build a lookup from stable section/annotation keys to annotation metadata."""
41
+ return {
42
+ (section_key, annotation_key): (section_content, annotation)
43
+ for section_key, section_content, annotation_key, annotation in get_annotation_entries(codebook)
44
+ }
45
+
46
+
47
+ def get_annotation_condition(annotation: dict[str, Any]) -> dict[str, Any] | None:
48
+ """Return a normalized condition block when one is present."""
49
+ condition = annotation.get("condition")
50
+ if not isinstance(condition, dict):
51
+ return None
52
+
53
+ section_key = condition.get("section_key")
54
+ annotation_key = condition.get("annotation_key")
55
+ if not section_key or not annotation_key:
56
+ return None
57
+
58
+ return {
59
+ "section_key": section_key,
60
+ "annotation_key": annotation_key,
61
+ "value": condition.get("value"),
62
+ }
63
+
64
+
65
+ def normalize_annotation_response_value(annotation: dict[str, Any], value: Any) -> Any:
66
+ """Coerce stored responses into stable comparable values."""
67
+ if pd.isna(value):
68
+ return None
69
+
70
+ annotation_type = annotation.get("type", "dropdown")
71
+ if annotation_type == "dropdown":
72
+ normalized = str(value).strip().strip("`").strip()
73
+ if normalized == "":
74
+ return None
75
+
76
+ options = annotation.get("options") or []
77
+ if not options:
78
+ return normalized
79
+
80
+ option_lookup = {str(option).strip().casefold(): option for option in options}
81
+ return option_lookup.get(normalized.casefold())
82
+
83
+ if annotation_type == "checkbox":
84
+ lowered = str(value).strip().lower()
85
+ if lowered in {"1", "true", "yes"}:
86
+ return 1
87
+ if lowered in {"0", "false", "no"}:
88
+ return 0
89
+ return value
90
+
91
+ if annotation_type == "likert":
92
+ try:
93
+ return int(value)
94
+ except (TypeError, ValueError):
95
+ return value
96
+
97
+ if annotation_type == "textbox":
98
+ return str(value).strip()
99
+
100
+ return str(value).strip()
101
+
102
+
103
+ def is_annotation_applicable(
104
+ codebook: dict[str, Any],
105
+ section_key: str,
106
+ annotation_key: str,
107
+ response_values: dict[str, Any],
108
+ lookup: dict[tuple[str, str], tuple[dict[str, Any], dict[str, Any]]] | None = None,
109
+ visited: set[tuple[str, str]] | None = None,
110
+ ) -> bool:
111
+ """Return whether an annotation should be shown/generate for the current responses."""
112
+ lookup = lookup or get_annotation_lookup(codebook)
113
+ current_entry = lookup.get((section_key, annotation_key))
114
+ if not current_entry:
115
+ return True
116
+
117
+ _, annotation = current_entry
118
+ condition = get_annotation_condition(annotation)
119
+ if not condition:
120
+ return True
121
+
122
+ target_key = (condition["section_key"], condition["annotation_key"])
123
+ if target_key == (section_key, annotation_key):
124
+ return True
125
+
126
+ target_entry = lookup.get(target_key)
127
+ if not target_entry:
128
+ return True
129
+
130
+ visited = visited or set()
131
+ if (section_key, annotation_key) in visited:
132
+ return True
133
+
134
+ target_section_content, target_annotation = target_entry
135
+ if not is_annotation_applicable(
136
+ codebook,
137
+ condition["section_key"],
138
+ condition["annotation_key"],
139
+ response_values,
140
+ lookup=lookup,
141
+ visited=visited | {(section_key, annotation_key)},
142
+ ):
143
+ return False
144
+
145
+ target_column_name = get_annotation_column_name(target_section_content, target_annotation)
146
+ actual_value = normalize_annotation_response_value(target_annotation, response_values.get(target_column_name))
147
+ expected_value = normalize_annotation_response_value(target_annotation, condition.get("value"))
148
+
149
+ if actual_value is None:
150
+ return False
151
+ if target_annotation.get("type") == "textbox" and actual_value == "":
152
+ return False
153
+
154
+ return actual_value == expected_value
@@ -16,6 +16,13 @@ import krippendorff
16
16
  from scipy.stats import spearmanr
17
17
  from sklearn.metrics import confusion_matrix
18
18
 
19
+ from .conditions import (
20
+ get_annotation_column_name,
21
+ get_annotation_condition,
22
+ get_annotation_entries,
23
+ get_annotation_lookup,
24
+ normalize_annotation_response_value,
25
+ )
19
26
  from .types import MetricsRunResult
20
27
 
21
28
  logger = logging.getLogger(__name__)
@@ -82,38 +89,90 @@ def extract_column_info_from_codebook(codebook_path):
82
89
  """
83
90
  with open(codebook_path, 'r') as file:
84
91
  codebook = json.load(file)
85
-
92
+
93
+ lookup = get_annotation_lookup(codebook)
86
94
  column_info = {}
87
-
88
- for key, section in codebook.items():
89
- if key.startswith('section_'):
90
- section_name = section['section_name']
91
- annotations = section['annotations']
92
-
93
- for annotation_key, annotation in annotations.items():
94
- name = annotation['name']
95
-
96
- column_name = f"{section_name}_{name}"
97
-
98
- # Extract annotation type and relevant properties
99
- annotation_type = annotation.get('type', 'dropdown') # Default to dropdown for backward compatibility
100
-
101
- properties = {
102
- 'type': annotation_type
95
+
96
+ for section_key, section, annotation_key, annotation in get_annotation_entries(codebook):
97
+ column_name = get_annotation_column_name(section, annotation)
98
+ annotation_type = annotation.get('type', 'dropdown')
99
+
100
+ properties = {
101
+ 'type': annotation_type,
102
+ 'section_key': section_key,
103
+ 'annotation_key': annotation_key,
104
+ }
105
+
106
+ if annotation_type == 'dropdown':
107
+ properties['options'] = annotation.get('options', [])
108
+ elif annotation_type == 'likert':
109
+ properties['min_value'] = annotation.get('min_value', 0)
110
+ properties['max_value'] = annotation.get('max_value', 5)
111
+
112
+ condition = get_annotation_condition(annotation)
113
+ if condition:
114
+ source_entry = lookup.get((condition['section_key'], condition['annotation_key']))
115
+ if source_entry:
116
+ source_section, source_annotation = source_entry
117
+ properties['condition'] = {
118
+ 'source_column': get_annotation_column_name(source_section, source_annotation),
119
+ 'source_type': source_annotation.get('type', 'dropdown'),
120
+ 'value': normalize_annotation_response_value(source_annotation, condition.get('value')),
103
121
  }
104
-
105
- # Add type-specific properties
106
- if annotation_type == 'dropdown':
107
- properties['options'] = annotation.get('options', [])
108
- elif annotation_type == 'likert':
109
- properties['min_value'] = annotation.get('min_value', 0)
110
- properties['max_value'] = annotation.get('max_value', 5)
111
-
112
- column_info[column_name] = properties
122
+
123
+ column_info[column_name] = properties
113
124
 
114
125
  logger.debug("Extracted column info from codebook: %s", column_info)
115
126
  return column_info
116
127
 
128
+
129
+ def _is_row_applicable_for_column(merged_row, column, column_info, side="gt", visited=None):
130
+ """Return whether a conditional annotation is applicable for one merged row."""
131
+ info = column_info.get(column, {})
132
+ condition = info.get("condition")
133
+ if not condition:
134
+ return True
135
+
136
+ source_column = condition.get("source_column")
137
+ if not source_column:
138
+ return True
139
+
140
+ visited = visited or set()
141
+ if column in visited:
142
+ return True
143
+
144
+ if source_column in column_info and not _is_row_applicable_for_column(
145
+ merged_row,
146
+ source_column,
147
+ column_info,
148
+ side=side,
149
+ visited=visited | {column},
150
+ ):
151
+ return False
152
+
153
+ source_value = merged_row.get(f"{source_column}_{side}")
154
+ source_annotation = {"type": condition.get("source_type", "dropdown")}
155
+ actual_value = normalize_annotation_response_value(source_annotation, source_value)
156
+ expected_value = normalize_annotation_response_value(source_annotation, condition.get("value"))
157
+
158
+ if actual_value is None:
159
+ return False
160
+ if condition.get("source_type") == "textbox" and actual_value == "":
161
+ return False
162
+
163
+ return actual_value == expected_value
164
+
165
+
166
+ def _get_applicable_row_mask(merged_df, column, column_info, side="gt"):
167
+ """Return a boolean mask for rows where an annotation is applicable."""
168
+ if "condition" not in column_info.get(column, {}):
169
+ return pd.Series(True, index=merged_df.index)
170
+
171
+ return merged_df.apply(
172
+ lambda row: _is_row_applicable_for_column(row, column, column_info, side=side),
173
+ axis=1,
174
+ )
175
+
117
176
  def load_data(ground_truth_path, llm_output_path, columns_to_compare):
118
177
  """Load and align ground-truth and model-output CSV files for evaluation.
119
178
 
@@ -413,8 +472,9 @@ def evaluate_performance(merged_df, columns_to_compare, column_info, process_tex
413
472
  reports[column] = "Textbox processing skipped."
414
473
  continue
415
474
 
416
- y_true = merged_df[column_gt]
417
- y_pred = merged_df[column_llm]
475
+ applicable_mask = _get_applicable_row_mask(merged_df, column, column_info, side="gt")
476
+ y_true = merged_df.loc[applicable_mask, column_gt]
477
+ y_pred = merged_df.loc[applicable_mask, column_llm]
418
478
 
419
479
  # Handle values based on annotation type
420
480
  if annotation_type == 'checkbox':
@@ -548,8 +608,8 @@ def evaluate_performance(merged_df, columns_to_compare, column_info, process_tex
548
608
 
549
609
  # For Krippendorff's alpha
550
610
  label_to_int = {label: i for i, label in enumerate(['missing'] + all_labels)}
551
- y_true_encoded = np.array([label_to_int[y_true_clean[i]] for i in range(len(y_true_clean))])
552
- y_pred_encoded = np.array([label_to_int[y_pred_clean[i]] for i in range(len(y_pred_clean))])
611
+ y_true_encoded = np.array([label_to_int[value] for value in y_true_clean.tolist()])
612
+ y_pred_encoded = np.array([label_to_int[value] for value in y_pred_clean.tolist()])
553
613
  data = np.array([y_true_encoded, y_pred_encoded])
554
614
  krippendorff_alpha_scores[column] = krippendorff.alpha(reliability_data=data)
555
615
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codebook-lab
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: An LLM annotation experiment pipeline for computational social science.
5
5
  Author: Lorcan McLaren
6
6
  License-Expression: AGPL-3.0-only
@@ -45,7 +45,7 @@ Dynamic: license-file
45
45
 
46
46
  # CodeBook Lab
47
47
 
48
- [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921)
48
+ [![DOI](https://zenodo.org/badge/1186234207.svg)](https://doi.org/10.5281/zenodo.19185921) [![PyPI](https://img.shields.io/pypi/v/codebook-lab)](https://pypi.org/project/codebook-lab/) [![Python](https://img.shields.io/pypi/pyversions/codebook-lab)](https://pypi.org/project/codebook-lab/) [![License](https://img.shields.io/pypi/l/codebook-lab)](https://pypi.org/project/codebook-lab/)
49
49
 
50
50
  CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
51
51
 
@@ -297,7 +297,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
297
297
  If you use CodeBook Lab in research, please cite both:
298
298
 
299
299
  - this software package
300
- - the associated preprint
300
+ - the associated arXiv preprint
301
301
 
302
302
  Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
303
303
 
@@ -324,7 +324,7 @@ BibTeX:
324
324
 
325
325
  APSR style:
326
326
 
327
- McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. Preprint.
327
+ McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
328
328
 
329
329
  BibTeX:
330
330
 
@@ -333,6 +333,10 @@ BibTeX:
333
333
  author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
334
334
  title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
335
335
  year = {2026},
336
- note = {Preprint}
336
+ eprint = {2603.26898},
337
+ archivePrefix = {arXiv},
338
+ primaryClass = {cs.CL},
339
+ doi = {10.48550/arXiv.2603.26898},
340
+ url = {https://arxiv.org/abs/2603.26898}
337
341
  }
338
342
  ```
@@ -3,6 +3,7 @@ README.md
3
3
  pyproject.toml
4
4
  codebook_lab/__init__.py
5
5
  codebook_lab/annotate.py
6
+ codebook_lab/conditions.py
6
7
  codebook_lab/examples.py
7
8
  codebook_lab/experiments.py
8
9
  codebook_lab/metrics.py
@@ -18,10 +19,7 @@ codebook_lab.egg-info/top_level.txt
18
19
  codebook_lab/tasks/__init__.py
19
20
  codebook_lab/tasks/policy-sentiment/codebook.json
20
21
  codebook_lab/tasks/policy-sentiment/ground-truth.csv
21
- scripts/multi_run_example.py
22
- scripts/single_run_example.py
23
- tests/__init__.py
24
- tests/conftest.py
22
+ tests/test_conditions.py
25
23
  tests/test_examples.py
26
24
  tests/test_experiments.py
27
25
  tests/test_metrics_summary.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codebook-lab"
7
- version = "1.0.0"
7
+ version = "1.1.1"
8
8
  description = "An LLM annotation experiment pipeline for computational social science."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+ import pandas as pd
6
+
7
+ from codebook_lab.annotate import classify_text, extract_json_response
8
+ from codebook_lab.metrics import evaluate_performance, extract_column_info_from_codebook
9
+
10
+
11
+ def _conditional_codebook() -> dict:
12
+ return {
13
+ "header_column": "id",
14
+ "text_column": "text",
15
+ "section_1": {
16
+ "section_name": "1. Relevance",
17
+ "section_instruction": "",
18
+ "annotations": {
19
+ "annotation_1": {
20
+ "name": "is_relevant",
21
+ "type": "dropdown",
22
+ "tooltip": "",
23
+ "options": ["Yes", "No"],
24
+ }
25
+ },
26
+ },
27
+ "section_2": {
28
+ "section_name": "2. Stance",
29
+ "section_instruction": "",
30
+ "annotations": {
31
+ "annotation_1": {
32
+ "name": "stance",
33
+ "type": "dropdown",
34
+ "tooltip": "",
35
+ "options": ["Positive", "Negative"],
36
+ "condition": {
37
+ "section_key": "section_1",
38
+ "annotation_key": "annotation_1",
39
+ "value": "Yes",
40
+ },
41
+ }
42
+ },
43
+ },
44
+ }
45
+
46
+
47
+ def test_classify_text_skips_inactive_conditional_annotations(monkeypatch):
48
+ codebook = _conditional_codebook()
49
+ prompts_seen: list[str] = []
50
+ responses = iter(
51
+ [
52
+ '{"response": "No"}',
53
+ ]
54
+ )
55
+
56
+ def fake_generate_response(*args, **kwargs):
57
+ prompts_seen.append(kwargs.get("annotation_name", ""))
58
+ return next(responses)
59
+
60
+ monkeypatch.setattr("codebook_lab.annotate.generate_response", fake_generate_response)
61
+
62
+ result, _, _ = classify_text(
63
+ chain=object(),
64
+ text="Example text",
65
+ codebook=codebook,
66
+ prompt_type="standard",
67
+ use_examples=False,
68
+ )
69
+
70
+ assert prompts_seen == ["1. Relevance_is_relevant"]
71
+ assert result["1. Relevance_is_relevant"] == "No"
72
+ assert "2. Stance_stance" in result
73
+ assert result["2. Stance_stance"] is None
74
+
75
+
76
+ def test_metrics_ignore_non_applicable_conditional_rows(tmp_path):
77
+ codebook = _conditional_codebook()
78
+ codebook_path = tmp_path / "codebook.json"
79
+ codebook_path.write_text(json.dumps(codebook))
80
+
81
+ column_info = extract_column_info_from_codebook(codebook_path)
82
+ merged_df = pd.DataFrame(
83
+ {
84
+ "1. Relevance_is_relevant_gt": ["No", "Yes"],
85
+ "1. Relevance_is_relevant_llm": ["No", "No"],
86
+ "2. Stance_stance_gt": [None, "Positive"],
87
+ "2. Stance_stance_llm": [None, None],
88
+ }
89
+ )
90
+
91
+ metrics = evaluate_performance(
92
+ merged_df=merged_df,
93
+ columns_to_compare=["2. Stance_stance"],
94
+ column_info=column_info,
95
+ process_textbox=False,
96
+ )
97
+
98
+ accuracy_scores = metrics[0]
99
+ percentage_agreement_scores = metrics[6]
100
+
101
+ assert accuracy_scores["2. Stance_stance"] == 0.0
102
+ assert percentage_agreement_scores["2. Stance_stance"] == 0.0
103
+
104
+
105
+ def test_extract_json_response_normalizes_dropdown_options():
106
+ options = ["Yes", "No"]
107
+
108
+ assert extract_json_response(
109
+ '{"response": " yes\\n"}',
110
+ "dropdown",
111
+ options=options,
112
+ ) == "Yes"
113
+ assert extract_json_response(" No\n", "dropdown", options=options) == "No"
114
+ assert extract_json_response(
115
+ '{"response": "JSON"}',
116
+ "dropdown",
117
+ options=options,
118
+ ) is None
119
+ assert extract_json_response("JSON\n", "dropdown", options=options) is None
120
+
121
+
122
+ def test_classify_text_stores_none_for_invalid_dropdown_outputs(monkeypatch):
123
+ codebook = _conditional_codebook()
124
+ responses = iter(
125
+ [
126
+ "JSON\n",
127
+ ]
128
+ )
129
+
130
+ def fake_generate_response(*args, **kwargs):
131
+ return next(responses)
132
+
133
+ monkeypatch.setattr("codebook_lab.annotate.generate_response", fake_generate_response)
134
+
135
+ result, _, _ = classify_text(
136
+ chain=object(),
137
+ text="Example text",
138
+ codebook=codebook,
139
+ prompt_type="standard",
140
+ use_examples=False,
141
+ )
142
+
143
+ assert result["1. Relevance_is_relevant"] is None
144
+ assert result["2. Stance_stance"] is None
@@ -1,41 +0,0 @@
1
- """Run a small multi-experiment sweep with CodeBook Lab.
2
-
3
- This script is intentionally small so users can test the package quickly.
4
- Edit the grid below to explore more combinations once the basic workflow is
5
- working in your environment. The package will try to start a local Ollama
6
- server if needed and will pull any missing models automatically.
7
- """
8
-
9
- from pathlib import Path
10
-
11
- from codebook_lab import run_experiment_grid
12
-
13
-
14
- OUTPUT_ROOT = Path("outputs")
15
-
16
- PARAM_GRID = {
17
- "country_iso_code": "IRL",
18
- "tasks": ["policy-sentiment"],
19
- "models": ["gemma3:270m"],
20
- "use_examples": [False, True],
21
- "prompt_types": ["standard"],
22
- "temperatures": [None],
23
- "top_ps": [None],
24
- "process_textboxes": [True],
25
- }
26
-
27
-
28
- def main() -> None:
29
- """Run a small sweep and print a short summary of the completed runs."""
30
- results = run_experiment_grid(
31
- param_grid=PARAM_GRID,
32
- output_root=OUTPUT_ROOT,
33
- )
34
-
35
- print(f"Completed {len(results)} experiment runs.")
36
- for result in results:
37
- print(f"- {result.model_id}: {result.experiment_directory}")
38
-
39
-
40
- if __name__ == "__main__":
41
- main()
@@ -1,48 +0,0 @@
1
- """Run one bundled-example experiment with CodeBook Lab.
2
-
3
- Edit the constants below if you want to change the model, task, or output
4
- location. This script assumes:
5
-
6
- 1. CodeBook Lab has been installed in the current environment, for example
7
- with ``python -m pip install codebook-lab``.
8
- 2. Ollama is installed and available on PATH.
9
-
10
- The package will try to start a local Ollama server if needed and will pull the
11
- requested model automatically before running the experiment.
12
- """
13
-
14
- from pathlib import Path
15
-
16
- from codebook_lab import ExperimentSpec, run_experiment
17
-
18
-
19
- TASK = "policy-sentiment"
20
- MODEL = "gemma3:270m"
21
- COUNTRY_ISO_CODE = "IRL"
22
- OUTPUT_ROOT = Path("outputs")
23
-
24
-
25
- def main() -> None:
26
- """Run a single experiment and print the key output locations."""
27
- result = run_experiment(
28
- ExperimentSpec(
29
- task=TASK,
30
- model=MODEL,
31
- use_examples=False,
32
- prompt_type="standard",
33
- temperature=None,
34
- top_p=None,
35
- process_textbox=True,
36
- country_iso_code=COUNTRY_ISO_CODE,
37
- ),
38
- output_root=OUTPUT_ROOT,
39
- )
40
-
41
- print("Completed single experiment run.")
42
- print(f"Experiment directory: {result.experiment_directory}")
43
- print(f"Metrics CSV: {result.metrics.output_csv}")
44
- print(f"Classification report: {result.metrics.report_file}")
45
-
46
-
47
- if __name__ == "__main__":
48
- main()
File without changes
@@ -1,13 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
-
5
- import pytest
6
-
7
-
8
- @pytest.fixture()
9
- def bundled_task_dir() -> Path:
10
- """Return the path to the bundled policy-sentiment example task."""
11
- task_dir = Path(__file__).resolve().parent.parent / "codebook_lab" / "tasks" / "policy-sentiment"
12
- assert task_dir.exists(), f"Bundled task directory not found: {task_dir}"
13
- return task_dir
File without changes
File without changes