codebook-lab 1.1.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/PKG-INFO +9 -5
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/README.md +8 -4
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/annotate.py +3 -1
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/PKG-INFO +9 -5
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/SOURCES.txt +0 -4
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/pyproject.toml +1 -1
- codebook_lab-1.1.0/scripts/multi_run_example.py +0 -41
- codebook_lab-1.1.0/scripts/single_run_example.py +0 -48
- codebook_lab-1.1.0/tests/__init__.py +0 -0
- codebook_lab-1.1.0/tests/conftest.py +0 -13
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/LICENSE +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/__init__.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/conditions.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/examples.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/experiments.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/metrics.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/ollama.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/prompts.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/py.typed +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/tasks/__init__.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/tasks/policy-sentiment/codebook.json +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/tasks/policy-sentiment/ground-truth.csv +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/types.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/dependency_links.txt +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/requires.txt +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab.egg-info/top_level.txt +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/setup.cfg +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_conditions.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_examples.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_experiments.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_metrics_summary.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_package_import.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_prompts.py +0 -0
- {codebook_lab-1.1.0 → codebook_lab-1.1.1}/tests/test_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codebook-lab
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: An LLM annotation experiment pipeline for computational social science.
|
|
5
5
|
Author: Lorcan McLaren
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
@@ -45,7 +45,7 @@ Dynamic: license-file
|
|
|
45
45
|
|
|
46
46
|
# CodeBook Lab
|
|
47
47
|
|
|
48
|
-
[](https://doi.org/10.5281/zenodo.19185921)
|
|
48
|
+
[](https://doi.org/10.5281/zenodo.19185921) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/)
|
|
49
49
|
|
|
50
50
|
CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
|
|
51
51
|
|
|
@@ -297,7 +297,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
|
|
|
297
297
|
If you use CodeBook Lab in research, please cite both:
|
|
298
298
|
|
|
299
299
|
- this software package
|
|
300
|
-
- the associated preprint
|
|
300
|
+
- the associated arXiv preprint
|
|
301
301
|
|
|
302
302
|
Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
|
|
303
303
|
|
|
@@ -324,7 +324,7 @@ BibTeX:
|
|
|
324
324
|
|
|
325
325
|
APSR style:
|
|
326
326
|
|
|
327
|
-
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*.
|
|
327
|
+
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
|
|
328
328
|
|
|
329
329
|
BibTeX:
|
|
330
330
|
|
|
@@ -333,6 +333,10 @@ BibTeX:
|
|
|
333
333
|
author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
|
|
334
334
|
title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
|
|
335
335
|
year = {2026},
|
|
336
|
-
|
|
336
|
+
eprint = {2603.26898},
|
|
337
|
+
archivePrefix = {arXiv},
|
|
338
|
+
primaryClass = {cs.CL},
|
|
339
|
+
doi = {10.48550/arXiv.2603.26898},
|
|
340
|
+
url = {https://arxiv.org/abs/2603.26898}
|
|
337
341
|
}
|
|
338
342
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# CodeBook Lab
|
|
2
2
|
|
|
3
|
-
[](https://doi.org/10.5281/zenodo.19185921)
|
|
3
|
+
[](https://doi.org/10.5281/zenodo.19185921) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/)
|
|
4
4
|
|
|
5
5
|
CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
|
|
6
6
|
|
|
@@ -252,7 +252,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
|
|
|
252
252
|
If you use CodeBook Lab in research, please cite both:
|
|
253
253
|
|
|
254
254
|
- this software package
|
|
255
|
-
- the associated preprint
|
|
255
|
+
- the associated arXiv preprint
|
|
256
256
|
|
|
257
257
|
Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
|
|
258
258
|
|
|
@@ -279,7 +279,7 @@ BibTeX:
|
|
|
279
279
|
|
|
280
280
|
APSR style:
|
|
281
281
|
|
|
282
|
-
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*.
|
|
282
|
+
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
|
|
283
283
|
|
|
284
284
|
BibTeX:
|
|
285
285
|
|
|
@@ -288,6 +288,10 @@ BibTeX:
|
|
|
288
288
|
author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
|
|
289
289
|
title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
|
|
290
290
|
year = {2026},
|
|
291
|
-
|
|
291
|
+
eprint = {2603.26898},
|
|
292
|
+
archivePrefix = {arXiv},
|
|
293
|
+
primaryClass = {cs.CL},
|
|
294
|
+
doi = {10.48550/arXiv.2603.26898},
|
|
295
|
+
url = {https://arxiv.org/abs/2603.26898}
|
|
292
296
|
}
|
|
293
297
|
```
|
|
@@ -205,7 +205,9 @@ def generate_response(chain, prompt, char_counts, timing_data, row_num=None, ann
|
|
|
205
205
|
|
|
206
206
|
structured_chain = (
|
|
207
207
|
_PROMPT_TEMPLATE
|
|
208
|
-
| chain.with_structured_output(
|
|
208
|
+
| chain.with_structured_output(
|
|
209
|
+
AnnotationResponse, method="json_schema", include_raw=True
|
|
210
|
+
)
|
|
209
211
|
)
|
|
210
212
|
|
|
211
213
|
start_time = time.time()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codebook-lab
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: An LLM annotation experiment pipeline for computational social science.
|
|
5
5
|
Author: Lorcan McLaren
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
@@ -45,7 +45,7 @@ Dynamic: license-file
|
|
|
45
45
|
|
|
46
46
|
# CodeBook Lab
|
|
47
47
|
|
|
48
|
-
[](https://doi.org/10.5281/zenodo.19185921)
|
|
48
|
+
[](https://doi.org/10.5281/zenodo.19185921) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/) [](https://pypi.org/project/codebook-lab/)
|
|
49
49
|
|
|
50
50
|
CodeBook Lab is an LLM annotation experiment pipeline for computational social science. It takes a codebook and labelled dataset from [CodeBook Studio](https://codebook.streamlit.app/) ([source](https://github.com/LorcanMcLaren/codebook-studio)) and runs structured experiments across the dimensions that matter for text-as-data research: model choice, model size, prompt style, zero-shot versus few-shot learning, and sampling hyperparameters — all benchmarked against human labels.
|
|
51
51
|
|
|
@@ -297,7 +297,7 @@ This project is licensed under the [GNU Affero General Public License v3.0](http
|
|
|
297
297
|
If you use CodeBook Lab in research, please cite both:
|
|
298
298
|
|
|
299
299
|
- this software package
|
|
300
|
-
- the associated preprint
|
|
300
|
+
- the associated arXiv preprint
|
|
301
301
|
|
|
302
302
|
Citation metadata is also available in the project's [`CITATION.cff`](https://github.com/LorcanMcLaren/codebook-lab/blob/main/CITATION.cff).
|
|
303
303
|
|
|
@@ -324,7 +324,7 @@ BibTeX:
|
|
|
324
324
|
|
|
325
325
|
APSR style:
|
|
326
326
|
|
|
327
|
-
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*.
|
|
327
|
+
McLaren, Lorcan, James P. Cross, Zuzanna Krakowska, Robin Rauner, and Martijn Schoonvelde. 2026. *Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation*. arXiv preprint arXiv:2603.26898. [https://arxiv.org/abs/2603.26898](https://arxiv.org/abs/2603.26898).
|
|
328
328
|
|
|
329
329
|
BibTeX:
|
|
330
330
|
|
|
@@ -333,6 +333,10 @@ BibTeX:
|
|
|
333
333
|
author = {McLaren, Lorcan and Cross, James P. and Krakowska, Zuzanna and Rauner, Robin and Schoonvelde, Martijn},
|
|
334
334
|
title = {Magic Words or Methodical Work? Challenging Conventional Wisdom in LLM-Based Political Text Annotation},
|
|
335
335
|
year = {2026},
|
|
336
|
-
|
|
336
|
+
eprint = {2603.26898},
|
|
337
|
+
archivePrefix = {arXiv},
|
|
338
|
+
primaryClass = {cs.CL},
|
|
339
|
+
doi = {10.48550/arXiv.2603.26898},
|
|
340
|
+
url = {https://arxiv.org/abs/2603.26898}
|
|
337
341
|
}
|
|
338
342
|
```
|
|
@@ -19,10 +19,6 @@ codebook_lab.egg-info/top_level.txt
|
|
|
19
19
|
codebook_lab/tasks/__init__.py
|
|
20
20
|
codebook_lab/tasks/policy-sentiment/codebook.json
|
|
21
21
|
codebook_lab/tasks/policy-sentiment/ground-truth.csv
|
|
22
|
-
scripts/multi_run_example.py
|
|
23
|
-
scripts/single_run_example.py
|
|
24
|
-
tests/__init__.py
|
|
25
|
-
tests/conftest.py
|
|
26
22
|
tests/test_conditions.py
|
|
27
23
|
tests/test_examples.py
|
|
28
24
|
tests/test_experiments.py
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"""Run a small multi-experiment sweep with CodeBook Lab.
|
|
2
|
-
|
|
3
|
-
This script is intentionally small so users can test the package quickly.
|
|
4
|
-
Edit the grid below to explore more combinations once the basic workflow is
|
|
5
|
-
working in your environment. The package will try to start a local Ollama
|
|
6
|
-
server if needed and will pull any missing models automatically.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
|
-
from codebook_lab import run_experiment_grid
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
OUTPUT_ROOT = Path("outputs")
|
|
15
|
-
|
|
16
|
-
PARAM_GRID = {
|
|
17
|
-
"country_iso_code": "IRL",
|
|
18
|
-
"tasks": ["policy-sentiment"],
|
|
19
|
-
"models": ["gemma3:270m"],
|
|
20
|
-
"use_examples": [False, True],
|
|
21
|
-
"prompt_types": ["standard"],
|
|
22
|
-
"temperatures": [None],
|
|
23
|
-
"top_ps": [None],
|
|
24
|
-
"process_textboxes": [True],
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def main() -> None:
|
|
29
|
-
"""Run a small sweep and print a short summary of the completed runs."""
|
|
30
|
-
results = run_experiment_grid(
|
|
31
|
-
param_grid=PARAM_GRID,
|
|
32
|
-
output_root=OUTPUT_ROOT,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
print(f"Completed {len(results)} experiment runs.")
|
|
36
|
-
for result in results:
|
|
37
|
-
print(f"- {result.model_id}: {result.experiment_directory}")
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if __name__ == "__main__":
|
|
41
|
-
main()
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
"""Run one bundled-example experiment with CodeBook Lab.
|
|
2
|
-
|
|
3
|
-
Edit the constants below if you want to change the model, task, or output
|
|
4
|
-
location. This script assumes:
|
|
5
|
-
|
|
6
|
-
1. CodeBook Lab has been installed in the current environment, for example
|
|
7
|
-
with ``python -m pip install codebook-lab``.
|
|
8
|
-
2. Ollama is installed and available on PATH.
|
|
9
|
-
|
|
10
|
-
The package will try to start a local Ollama server if needed and will pull the
|
|
11
|
-
requested model automatically before running the experiment.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from pathlib import Path
|
|
15
|
-
|
|
16
|
-
from codebook_lab import ExperimentSpec, run_experiment
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
TASK = "policy-sentiment"
|
|
20
|
-
MODEL = "gemma3:270m"
|
|
21
|
-
COUNTRY_ISO_CODE = "IRL"
|
|
22
|
-
OUTPUT_ROOT = Path("outputs")
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def main() -> None:
|
|
26
|
-
"""Run a single experiment and print the key output locations."""
|
|
27
|
-
result = run_experiment(
|
|
28
|
-
ExperimentSpec(
|
|
29
|
-
task=TASK,
|
|
30
|
-
model=MODEL,
|
|
31
|
-
use_examples=False,
|
|
32
|
-
prompt_type="standard",
|
|
33
|
-
temperature=None,
|
|
34
|
-
top_p=None,
|
|
35
|
-
process_textbox=True,
|
|
36
|
-
country_iso_code=COUNTRY_ISO_CODE,
|
|
37
|
-
),
|
|
38
|
-
output_root=OUTPUT_ROOT,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
print("Completed single experiment run.")
|
|
42
|
-
print(f"Experiment directory: {result.experiment_directory}")
|
|
43
|
-
print(f"Metrics CSV: {result.metrics.output_csv}")
|
|
44
|
-
print(f"Classification report: {result.metrics.report_file}")
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if __name__ == "__main__":
|
|
48
|
-
main()
|
|
File without changes
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@pytest.fixture()
|
|
9
|
-
def bundled_task_dir() -> Path:
|
|
10
|
-
"""Return the path to the bundled policy-sentiment example task."""
|
|
11
|
-
task_dir = Path(__file__).resolve().parent.parent / "codebook_lab" / "tasks" / "policy-sentiment"
|
|
12
|
-
assert task_dir.exists(), f"Bundled task directory not found: {task_dir}"
|
|
13
|
-
return task_dir
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{codebook_lab-1.1.0 → codebook_lab-1.1.1}/codebook_lab/tasks/policy-sentiment/ground-truth.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|