distribird 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distribird-0.1.0/.dockerignore +12 -0
- distribird-0.1.0/.env.example +19 -0
- distribird-0.1.0/.github/workflows/ci.yml +99 -0
- distribird-0.1.0/.github/workflows/publish.yml +29 -0
- distribird-0.1.0/.gitignore +38 -0
- distribird-0.1.0/.streamlit/config.toml +3 -0
- distribird-0.1.0/CONTRIBUTING.md +48 -0
- distribird-0.1.0/Dockerfile +17 -0
- distribird-0.1.0/LICENSE +21 -0
- distribird-0.1.0/PKG-INFO +225 -0
- distribird-0.1.0/README.md +188 -0
- distribird-0.1.0/assets/docs.html +495 -0
- distribird-0.1.0/assets/logo.svg +47 -0
- distribird-0.1.0/docker-compose.yml +22 -0
- distribird-0.1.0/examples/maize_bgcmuso/demo.py +49 -0
- distribird-0.1.0/examples/maize_bgcmuso/parameters.json +37 -0
- distribird-0.1.0/pyproject.toml +74 -0
- distribird-0.1.0/requirements.txt +1 -0
- distribird-0.1.0/src/distribird/__init__.py +3 -0
- distribird-0.1.0/src/distribird/agent/__init__.py +0 -0
- distribird-0.1.0/src/distribird/agent/agents.py +190 -0
- distribird-0.1.0/src/distribird/agent/deliberation.py +293 -0
- distribird-0.1.0/src/distribird/agent/enrich.py +133 -0
- distribird-0.1.0/src/distribird/agent/extract.py +603 -0
- distribird-0.1.0/src/distribird/agent/fulltext.py +165 -0
- distribird-0.1.0/src/distribird/agent/graph.py +182 -0
- distribird-0.1.0/src/distribird/agent/nodes.py +666 -0
- distribird-0.1.0/src/distribird/agent/pipeline.py +62 -0
- distribird-0.1.0/src/distribird/agent/prompts.py +564 -0
- distribird-0.1.0/src/distribird/agent/search.py +586 -0
- distribird-0.1.0/src/distribird/agent/search_openalex.py +144 -0
- distribird-0.1.0/src/distribird/agent/state.py +232 -0
- distribird-0.1.0/src/distribird/agent/synthesize.py +129 -0
- distribird-0.1.0/src/distribird/api/__init__.py +0 -0
- distribird-0.1.0/src/distribird/api/routes.py +103 -0
- distribird-0.1.0/src/distribird/config.py +56 -0
- distribird-0.1.0/src/distribird/distributions/__init__.py +0 -0
- distribird-0.1.0/src/distribird/distributions/constraints.py +43 -0
- distribird-0.1.0/src/distribird/distributions/fitting.py +325 -0
- distribird-0.1.0/src/distribird/distributions/uninformative.py +59 -0
- distribird-0.1.0/src/distribird/export/__init__.py +0 -0
- distribird-0.1.0/src/distribird/export/json_export.py +45 -0
- distribird-0.1.0/src/distribird/export/python_export.py +74 -0
- distribird-0.1.0/src/distribird/export/r_export.py +69 -0
- distribird-0.1.0/src/distribird/models.py +158 -0
- distribird-0.1.0/src/distribird/ui/__init__.py +0 -0
- distribird-0.1.0/src/distribird/ui/app.py +933 -0
- distribird-0.1.0/src/distribird/ui/persistence.py +112 -0
- distribird-0.1.0/streamlit_app.py +5 -0
- distribird-0.1.0/tests/__init__.py +0 -0
- distribird-0.1.0/tests/conftest.py +28 -0
- distribird-0.1.0/tests/test_agents.py +180 -0
- distribird-0.1.0/tests/test_api.py +69 -0
- distribird-0.1.0/tests/test_deliberation.py +318 -0
- distribird-0.1.0/tests/test_enrich.py +214 -0
- distribird-0.1.0/tests/test_export.py +91 -0
- distribird-0.1.0/tests/test_extract.py +367 -0
- distribird-0.1.0/tests/test_fitting.py +120 -0
- distribird-0.1.0/tests/test_graph.py +276 -0
- distribird-0.1.0/tests/test_models.py +89 -0
- distribird-0.1.0/tests/test_nodes.py +250 -0
- distribird-0.1.0/tests/test_persistence.py +53 -0
- distribird-0.1.0/tests/test_pipeline.py +190 -0
- distribird-0.1.0/tests/test_relevance.py +107 -0
- distribird-0.1.0/tests/test_search.py +379 -0
- distribird-0.1.0/tests/test_search_openalex.py +136 -0
- distribird-0.1.0/tests/test_snowball.py +154 -0
- distribird-0.1.0/tests/test_state.py +141 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Distribird configuration
|
|
2
|
+
# Copy to .env and fill in your values: cp .env.example .env
|
|
3
|
+
|
|
4
|
+
# Auth (change these!)
|
|
5
|
+
DISTRIBIRD_AUTH_USERNAME=demo
|
|
6
|
+
DISTRIBIRD_AUTH_PASSWORD=changeme
|
|
7
|
+
|
|
8
|
+
# LLM (required — any OpenAI-compatible endpoint via LiteLLM)
|
|
9
|
+
DISTRIBIRD_LLM_BASE_URL=https://your-llm-host.example.com/api
|
|
10
|
+
DISTRIBIRD_LLM_API_KEY=sk-your-key-here
|
|
11
|
+
DISTRIBIRD_LLM_MODEL=gemini-3-pro
|
|
12
|
+
|
|
13
|
+
# Semantic Scholar (optional, increases rate limits)
|
|
14
|
+
DISTRIBIRD_SEMANTIC_SCHOLAR_API_KEY=
|
|
15
|
+
|
|
16
|
+
# Deep Research (optional)
|
|
17
|
+
DISTRIBIRD_DEEP_RESEARCH_BASE_URL=
|
|
18
|
+
DISTRIBIRD_DEEP_RESEARCH_API_KEY=
|
|
19
|
+
DISTRIBIRD_DEEP_RESEARCH_MODEL=o4-mini-deep-research
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
name: Lint
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python 3.13
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.13"
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
|
|
26
|
+
- name: Ruff check
|
|
27
|
+
run: ruff check src/ tests/
|
|
28
|
+
|
|
29
|
+
- name: Ruff format check
|
|
30
|
+
run: ruff format --check src/ tests/
|
|
31
|
+
|
|
32
|
+
typecheck:
|
|
33
|
+
name: Type Check
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v4
|
|
37
|
+
|
|
38
|
+
- name: Set up Python 3.13
|
|
39
|
+
uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: "3.13"
|
|
42
|
+
|
|
43
|
+
- name: Install dependencies
|
|
44
|
+
run: |
|
|
45
|
+
python -m pip install --upgrade pip
|
|
46
|
+
pip install -e ".[dev]"
|
|
47
|
+
|
|
48
|
+
- name: Mypy
|
|
49
|
+
run: mypy src/distribird/
|
|
50
|
+
|
|
51
|
+
test:
|
|
52
|
+
name: Tests
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/checkout@v4
|
|
56
|
+
|
|
57
|
+
- name: Set up Python 3.13
|
|
58
|
+
uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.13"
|
|
61
|
+
|
|
62
|
+
- name: Install dependencies
|
|
63
|
+
run: |
|
|
64
|
+
python -m pip install --upgrade pip
|
|
65
|
+
pip install -e ".[dev]"
|
|
66
|
+
|
|
67
|
+
- name: Run tests
|
|
68
|
+
run: pytest --tb=short -q
|
|
69
|
+
|
|
70
|
+
build:
|
|
71
|
+
name: Build Package
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
|
|
76
|
+
- name: Set up Python 3.13
|
|
77
|
+
uses: actions/setup-python@v5
|
|
78
|
+
with:
|
|
79
|
+
python-version: "3.13"
|
|
80
|
+
|
|
81
|
+
- name: Install build tools
|
|
82
|
+
run: pip install --upgrade pip build
|
|
83
|
+
|
|
84
|
+
- name: Build sdist and wheel
|
|
85
|
+
run: python -m build
|
|
86
|
+
|
|
87
|
+
- name: Check package metadata
|
|
88
|
+
run: |
|
|
89
|
+
pip install twine
|
|
90
|
+
twine check dist/*
|
|
91
|
+
|
|
92
|
+
docker:
|
|
93
|
+
name: Docker Build
|
|
94
|
+
runs-on: ubuntu-latest
|
|
95
|
+
steps:
|
|
96
|
+
- uses: actions/checkout@v4
|
|
97
|
+
|
|
98
|
+
- name: Build Docker image
|
|
99
|
+
run: docker build -t distribird:ci .
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
name: Build & Publish
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
environment: pypi
|
|
12
|
+
permissions:
|
|
13
|
+
id-token: write
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.13"
|
|
21
|
+
|
|
22
|
+
- name: Install build tools
|
|
23
|
+
run: pip install --upgrade pip build
|
|
24
|
+
|
|
25
|
+
- name: Build sdist and wheel
|
|
26
|
+
run: python -m build
|
|
27
|
+
|
|
28
|
+
- name: Publish to PyPI
|
|
29
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
ENV/
|
|
14
|
+
|
|
15
|
+
# Environment / secrets
|
|
16
|
+
.env
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.idea/
|
|
20
|
+
.vscode/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
|
|
25
|
+
# Testing / linting caches
|
|
26
|
+
.pytest_cache/
|
|
27
|
+
.ruff_cache/
|
|
28
|
+
.mypy_cache/
|
|
29
|
+
htmlcov/
|
|
30
|
+
.coverage
|
|
31
|
+
coverage.xml
|
|
32
|
+
|
|
33
|
+
# OS
|
|
34
|
+
.DS_Store
|
|
35
|
+
Thumbs.db
|
|
36
|
+
|
|
37
|
+
# Claude Code
|
|
38
|
+
.claude/
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Contributing to Distribird
|
|
2
|
+
|
|
3
|
+
Contributions are welcome! Here's how to get started.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/HUN-REN-AI1Science/Distribird.git
|
|
9
|
+
cd distribird
|
|
10
|
+
python -m venv .venv
|
|
11
|
+
source .venv/bin/activate
|
|
12
|
+
pip install -e ".[dev]"
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Running Tests
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pytest
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Code Style
|
|
22
|
+
|
|
23
|
+
We use ruff for linting and formatting:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
ruff check src/ tests/
|
|
27
|
+
ruff format src/ tests/
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Type checking with mypy:
|
|
31
|
+
```bash
|
|
32
|
+
mypy src/distribird/
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Pull Requests
|
|
36
|
+
|
|
37
|
+
1. Fork the repository
|
|
38
|
+
2. Create a feature branch
|
|
39
|
+
3. Add tests for new functionality
|
|
40
|
+
4. Ensure all tests pass
|
|
41
|
+
5. Submit a pull request
|
|
42
|
+
|
|
43
|
+
## Reporting Issues
|
|
44
|
+
|
|
45
|
+
Please open an issue on GitHub with:
|
|
46
|
+
- A clear description of the problem
|
|
47
|
+
- Steps to reproduce
|
|
48
|
+
- Expected vs actual behavior
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# System deps for matplotlib
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
7
|
+
libglib2.0-0 libsm6 libxrender1 libxext6 && \
|
|
8
|
+
rm -rf /var/lib/apt/lists/*
|
|
9
|
+
|
|
10
|
+
COPY pyproject.toml README.md ./
|
|
11
|
+
COPY src/ src/
|
|
12
|
+
|
|
13
|
+
RUN pip install --no-cache-dir .
|
|
14
|
+
|
|
15
|
+
COPY examples/ examples/
|
|
16
|
+
|
|
17
|
+
EXPOSE 8501 8000
|
distribird-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Roland Hollos, Patrik P. Süli
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: distribird
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Literature-informed Prior distributions for Bayesian model calibration
|
|
5
|
+
Project-URL: Repository, https://github.com/HUN-REN-AI1Science/Distribird
|
|
6
|
+
Project-URL: Documentation, https://distribird.streamlit.app
|
|
7
|
+
Author: Roland Hollos, Patrik P. Süli
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: bayesian,calibration,distribution,literature,prior
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: fastapi>=0.104
|
|
18
|
+
Requires-Dist: httpx>=0.25
|
|
19
|
+
Requires-Dist: langgraph>=1.0
|
|
20
|
+
Requires-Dist: matplotlib>=3.7
|
|
21
|
+
Requires-Dist: numpy>=1.24
|
|
22
|
+
Requires-Dist: openai>=1.0
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0
|
|
25
|
+
Requires-Dist: pymupdf>=1.24
|
|
26
|
+
Requires-Dist: scipy>=1.11
|
|
27
|
+
Requires-Dist: streamlit-local-storage>=0.0.21
|
|
28
|
+
Requires-Dist: streamlit>=1.28
|
|
29
|
+
Requires-Dist: uvicorn[standard]>=0.24
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: mypy>=1.7; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
34
|
+
Requires-Dist: respx>=0.20; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<img src="assets/logo.svg" alt="Distribird" width="360">
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
<p align="center">
|
|
43
|
+
<strong>Automated Bayesian prior construction from scientific literature</strong>
|
|
44
|
+
</p>
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
<a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.10%2B-3776AB?logo=python&logoColor=white" alt="Python 3.10+"></a>
|
|
48
|
+
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License: MIT"></a>
|
|
49
|
+
<img src="https://img.shields.io/badge/tests-148%20passed-brightgreen" alt="Tests: 148 passed">
|
|
50
|
+
<img src="https://img.shields.io/badge/lint-ruff-261230?logo=ruff&logoColor=D7FF64" alt="Linting: ruff">
|
|
51
|
+
<a href="https://langgraph.dev/"><img src="https://img.shields.io/badge/orchestration-LangGraph-1C3C3C?logo=langchain&logoColor=white" alt="LangGraph"></a>
|
|
52
|
+
<a href="https://fastapi.tiangolo.com"><img src="https://img.shields.io/badge/API-FastAPI-009688?logo=fastapi&logoColor=white" alt="FastAPI"></a>
|
|
53
|
+
<a href="https://distribird.streamlit.app"><img src="https://static.streamlit.io/badges/streamlit_badge_black_white.svg" alt="Open in Streamlit"></a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
**Distribird** turns a parameter name and description into a fully cited, publication-ready prior distribution.
|
|
59
|
+
It searches Semantic Scholar, OpenAlex, and LLM deep-research agents in parallel, extracts numerical values from papers, and fits the best-matching `scipy.stats` distribution via AIC selection.
|
|
60
|
+
|
|
61
|
+
> *"I need a prior for maximum leaf area index of maize."*
|
|
62
|
+
>
|
|
63
|
+
> ➜ `truncated_normal(mu=5.2, sigma=1.5, a=0, b=12)` — fitted from 6 peer-reviewed sources with full citations.
|
|
64
|
+
|
|
65
|
+
<p align="center">
|
|
66
|
+
<a href="https://distribird.streamlit.app"><img src="https://img.shields.io/badge/Try_it_now-distribird.streamlit.app-2d6a4f?style=for-the-badge" alt="Try it now"></a>
|
|
67
|
+
</p>
|
|
68
|
+
|
|
69
|
+
## Why Distribird?
|
|
70
|
+
|
|
71
|
+
Bayesian calibration requires informative priors, but building them from literature is tedious.
|
|
72
|
+
Researchers default to flat priors, losing valuable domain knowledge.
|
|
73
|
+
Distribird closes that gap: **describe your parameter, get a defensible prior in seconds.**
|
|
74
|
+
|
|
75
|
+
## Architecture
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
┌──────────────────────────────┐
|
|
79
|
+
│ LangGraph DAG │
|
|
80
|
+
└──────────────────────────────┘
|
|
81
|
+
|
|
82
|
+
START ─► Enrich ─► QueryGen ─► Search ─► RelevanceJudge ──┬► CrossEnrich ───┐
|
|
83
|
+
▲ │ │
|
|
84
|
+
│ └► FetchFulltext ◄┘
|
|
85
|
+
RefineSearch │
|
|
86
|
+
▲ Extract
|
|
87
|
+
│ │
|
|
88
|
+
│ RefineExtraction ◄─── QualityGate
|
|
89
|
+
│ │ │ │
|
|
90
|
+
│ └────────────────┘ │
|
|
91
|
+
│ ▼
|
|
92
|
+
└───────────────────────── Synthesize ─► END
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Multi-agent search** — Semantic Scholar, OpenAlex, and LLM deep-research agents run concurrently; a moderator LLM selects the best papers via deliberation.
|
|
96
|
+
|
|
97
|
+
**Relevance scoring** — An LLM-based relevance judge scores each paper before extraction. When multiple high-relevance papers are found, the pipeline routes through cross-enrichment (citation snowballing + follow-up queries) to discover additional sources.
|
|
98
|
+
|
|
99
|
+
**Feedback loops** — A quality gate inspects extraction results and can trigger search refinement (new queries) or extraction refinement (web-assisted re-extraction) before falling through to synthesis.
|
|
100
|
+
|
|
101
|
+
**Budget-bounded** — `IterationBudget` caps every loop to guarantee termination.
|
|
102
|
+
|
|
103
|
+
**Live progress** — The pipeline streams node-by-node updates to the UI, showing which step is running, paper/value counts, and per-parameter progress bars.
|
|
104
|
+
|
|
105
|
+
## Quickstart
|
|
106
|
+
|
|
107
|
+
### Install
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install distribird
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
<details>
|
|
114
|
+
<summary><strong>Development install</strong></summary>
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
git clone https://github.com/HUN-REN-AI1Science/Distribird.git
|
|
118
|
+
cd distribird
|
|
119
|
+
python -m venv .venv && source .venv/bin/activate
|
|
120
|
+
pip install -e ".[dev]"
|
|
121
|
+
pytest # 148 tests, all passing
|
|
122
|
+
```
|
|
123
|
+
</details>
|
|
124
|
+
|
|
125
|
+
### Configure
|
|
126
|
+
|
|
127
|
+
Distribird reads configuration from environment variables (prefix `DISTRIBIRD_`) or a `.env` file in the project root.
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# .env (or export these in your shell)
|
|
131
|
+
DISTRIBIRD_LLM_BASE_URL="http://localhost:4000" # any OpenAI-compatible endpoint
|
|
132
|
+
DISTRIBIRD_LLM_API_KEY="your-key"
|
|
133
|
+
DISTRIBIRD_LLM_MODEL="gpt-4o"
|
|
134
|
+
DISTRIBIRD_SEMANTIC_SCHOLAR_API_KEY="" # optional, increases rate limits
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Sidebar behaviour in the Streamlit UI:**
|
|
138
|
+
|
|
139
|
+
- Settings provided in `.env` are used automatically — no manual input needed.
|
|
140
|
+
- Settings **not** provided in `.env` appear as required fields in the sidebar; the user must fill them in before generation can start.
|
|
141
|
+
- An **"Override configured settings"** toggle lets users temporarily replace `.env` values without editing the file.
|
|
142
|
+
- Literature source toggles (Semantic Scholar, OpenAlex, LLM Web Search, LLM Deep Research) are always visible and control which connection fields are required.
|
|
143
|
+
|
|
144
|
+
### Use
|
|
145
|
+
|
|
146
|
+
**Python**
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
import asyncio
|
|
150
|
+
from distribird.agent.pipeline import run_parameter
|
|
151
|
+
from distribird.models import ParameterInput, ConstraintSpec
|
|
152
|
+
|
|
153
|
+
result = asyncio.run(run_parameter(
|
|
154
|
+
ParameterInput(
|
|
155
|
+
name="max_lai",
|
|
156
|
+
description="Maximum leaf area index of maize",
|
|
157
|
+
unit="m2/m2",
|
|
158
|
+
domain_context="Biome-BGCMuSo maize crop modeling",
|
|
159
|
+
constraints=ConstraintSpec(lower_bound=0, upper_bound=12),
|
|
160
|
+
)
|
|
161
|
+
))
|
|
162
|
+
|
|
163
|
+
print(result.prior.display_name()) # truncated_normal(mu=5.2, sigma=1.5, a=0, b=12)
|
|
164
|
+
print(result.prior.n_sources) # 6
|
|
165
|
+
print(result.prior.confidence.value) # high
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**REST API**
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
distribird-api # starts on :8000
|
|
172
|
+
|
|
173
|
+
curl -u demo:distribird2026 -X POST http://localhost:8000/api/v1/parameter \
|
|
174
|
+
-H "Content-Type: application/json" \
|
|
175
|
+
-d '{"name":"max_lai","description":"Maximum leaf area index of maize","unit":"m2/m2"}'
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
**Streamlit UI**
|
|
179
|
+
|
|
180
|
+
Try the hosted version at **[distribird.streamlit.app](https://distribird.streamlit.app)**, or run locally:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
streamlit run src/distribird/ui/app.py
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Prior Fitting Strategy
|
|
187
|
+
|
|
188
|
+
| Evidence | Method | Confidence |
|
|
189
|
+
|---|---|---|
|
|
190
|
+
| 5+ values | AIC across Normal, Truncated Normal, Gamma, Log-Normal, Beta | **High** |
|
|
191
|
+
| 2 – 4 values | Moment matching with widened σ | Medium |
|
|
192
|
+
| 1 value | Wide Normal centered on value | Low |
|
|
193
|
+
| 0 values | Jeffreys / wide uninformative prior | None |
|
|
194
|
+
|
|
195
|
+
All fitted distributions respect user-specified physical constraints (bounds).
|
|
196
|
+
|
|
197
|
+
## Export Formats
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
from distribird.export.json_export import export_json
|
|
201
|
+
from distribird.export.r_export import export_r
|
|
202
|
+
from distribird.export.python_export import export_python
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
| Format | Output |
|
|
206
|
+
|---|---|
|
|
207
|
+
| **JSON** | Parameter name, family, params, citations, confidence |
|
|
208
|
+
| **R** | Executable R script with distribution calls |
|
|
209
|
+
| **Python** | `scipy.stats` code ready for MCMC samplers |
|
|
210
|
+
|
|
211
|
+
## Demo
|
|
212
|
+
|
|
213
|
+
A complete worked example using five Biome-BGCMuSo maize parameters:
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
python examples/maize_bgcmuso/demo.py
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Testing
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
pytest # 148 tests
|
|
223
|
+
ruff check src/ tests/ # lint
|
|
224
|
+
mypy src/distribird/ # type checking (strict)
|
|
225
|
+
```
|