physical-reasoning-toolkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- physical_reasoning_toolkit-0.1.0/LICENSE +21 -0
- physical_reasoning_toolkit-0.1.0/MANIFEST.in +15 -0
- physical_reasoning_toolkit-0.1.0/PKG-INFO +372 -0
- physical_reasoning_toolkit-0.1.0/README.md +316 -0
- physical_reasoning_toolkit-0.1.0/pyproject.toml +111 -0
- physical_reasoning_toolkit-0.1.0/setup.cfg +4 -0
- physical_reasoning_toolkit-0.1.0/src/physical_reasoning_toolkit.egg-info/PKG-INFO +372 -0
- physical_reasoning_toolkit-0.1.0/src/physical_reasoning_toolkit.egg-info/SOURCES.txt +82 -0
- physical_reasoning_toolkit-0.1.0/src/physical_reasoning_toolkit.egg-info/dependency_links.txt +1 -0
- physical_reasoning_toolkit-0.1.0/src/physical_reasoning_toolkit.egg-info/requires.txt +36 -0
- physical_reasoning_toolkit-0.1.0/src/physical_reasoning_toolkit.egg-info/top_level.txt +1 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/__init__.py +37 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/__init__.py +14 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/annotations/__init__.py +14 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/annotations/domain.py +24 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/annotations/theorem.py +15 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workers/__init__.py +14 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workers/base.py +86 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workers/domain_labeler.py +128 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workers/theorem_detector.py +116 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workers/variable_locator.py +171 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/__init__.py +12 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/modules/__init__.py +16 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/modules/base_module.py +213 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/modules/detect_theorem_module.py +232 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/modules/domain_assessment_module.py +141 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/modules/review_theorem_module.py +574 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/presets/__init__.py +14 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/presets/domain_only_workflow.py +70 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/presets/theorem_label_only_workflow.py +69 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_annotation/workflows/workflow_composer.py +639 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/__init__.py +11 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/__init__.py +31 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/answer.py +236 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/answer_category.py +30 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/physics_dataset.py +376 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/physics_domain.py +68 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/physics_problem.py +397 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/domain/physics_solution.py +200 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/logging_config.py +597 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/ARCHITECTURE.md +188 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/__init__.py +40 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/base.py +49 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/deepseek.py +53 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/factory.py +87 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/gemini.py +95 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/ollama.py +200 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/openai.py +198 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_core/model_clients/utils.py +22 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/__init__.py +75 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/citations.py +115 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/__init__.py +23 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/base_downloader.py +334 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/phybench_downloader.py +324 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/physreason_downloader.py +292 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/phyx_downloader.py +809 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/seephys_downloader.py +519 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/downloaders/ugphysics_downloader.py +325 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/hub.py +334 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/__init__.py +23 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/base_loader.py +795 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/jeebench_loader.py +339 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/phybench_loader.py +168 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/physreason_loader.py +274 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/phyx_loader.py +232 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/seephys_loader.py +269 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/tpbench_loader.py +314 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/loaders/ugphysics_loader.py +322 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_datasets/utils.py +263 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/__init__.py +17 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/comparator/__init__.py +18 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/comparator/base.py +75 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/comparator/category_match.py +315 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/comparator/exact_match.py +55 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/comparator/normalized_match.py +93 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/evaluator/__init__.py +14 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/evaluator/accuracy.py +252 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/evaluator/base.py +67 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/NORMALIZATION.md +226 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/__init__.py +31 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/answer_utils.py +23 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/latex_symbol_preprocess.py +49 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/normalization.py +491 -0
- physical_reasoning_toolkit-0.1.0/src/prkit/prkit_evaluation/utils/number_utils.py +47 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 physica-reasoning-toolkit contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Include documentation files
|
|
2
|
+
include README.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include pyproject.toml
|
|
5
|
+
|
|
6
|
+
# Include all markdown and text files in the package
|
|
7
|
+
recursive-include src/prkit *.md
|
|
8
|
+
recursive-include src/prkit *.txt
|
|
9
|
+
recursive-include src/prkit *.rst
|
|
10
|
+
|
|
11
|
+
# Exclude unnecessary files
|
|
12
|
+
recursive-exclude * __pycache__
|
|
13
|
+
recursive-exclude * *.py[co]
|
|
14
|
+
recursive-exclude * .DS_Store
|
|
15
|
+
exclude .gitignore
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: physical-reasoning-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Physical Reasoning Toolkit
|
|
5
|
+
Author-email: Yinghuan Zhang <yinghuan.flash@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/sherryzyh/physical_reasoning_toolkit
|
|
8
|
+
Project-URL: Documentation, https://github.com/sherryzyh/physical_reasoning_toolkit#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/sherryzyh/physical_reasoning_toolkit
|
|
10
|
+
Project-URL: Issues, https://github.com/sherryzyh/physical_reasoning_toolkit/issues
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pandas>=2.3.1
|
|
23
|
+
Requires-Dist: numpy>=2.2.6
|
|
24
|
+
Requires-Dist: openai>=1.99.9
|
|
25
|
+
Requires-Dist: pydantic>=2.11.7
|
|
26
|
+
Requires-Dist: tqdm>=4.67.1
|
|
27
|
+
Requires-Dist: google-genai>=1.0.0
|
|
28
|
+
Requires-Dist: sympy>=1.14.0
|
|
29
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
30
|
+
Requires-Dist: latex2sympy2-extended>=1.0.0
|
|
31
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
32
|
+
Requires-Dist: datasets>=2.14.0
|
|
33
|
+
Requires-Dist: Pillow>=10.0.0
|
|
34
|
+
Requires-Dist: pip>=26.0
|
|
35
|
+
Requires-Dist: ollama>=0.1.0
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
41
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
42
|
+
Provides-Extra: docs
|
|
43
|
+
Requires-Dist: sphinx>=5.0.0; extra == "docs"
|
|
44
|
+
Requires-Dist: myst-parser>=1.0.0; extra == "docs"
|
|
45
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
46
|
+
Provides-Extra: all
|
|
47
|
+
Requires-Dist: pytest>=7.0.0; extra == "all"
|
|
48
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "all"
|
|
49
|
+
Requires-Dist: black>=23.0.0; extra == "all"
|
|
50
|
+
Requires-Dist: isort>=5.12.0; extra == "all"
|
|
51
|
+
Requires-Dist: mypy>=1.0.0; extra == "all"
|
|
52
|
+
Requires-Dist: sphinx>=5.0.0; extra == "all"
|
|
53
|
+
Requires-Dist: myst-parser>=1.0.0; extra == "all"
|
|
54
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "all"
|
|
55
|
+
Dynamic: license-file
|
|
56
|
+
|
|
57
|
+
# Physical Reasoning Toolkit 🔬
|
|
58
|
+
|
|
59
|
+
A unified toolkit for researchers and engineers working on **AI physical reasoning**. PRKit provides a shared foundation for representing physics problems, running inference with multiple model providers, evaluating outputs with physics-aware comparators, and building structured annotation workflows.
|
|
60
|
+
|
|
61
|
+
PRKit applies a “unified interface” idea to the full physical-reasoning loop (data ↔ annotation ↔ inference ↔ evaluation), rather than focusing on datasets alone.
|
|
62
|
+
|
|
63
|
+
## 🎯 Project Overview
|
|
64
|
+
|
|
65
|
+
PRKit centers on **core components** that define the physical reasoning ontology. Three integrated subpackages build on this foundation:
|
|
66
|
+
|
|
67
|
+
- **Core components**: `PhysicsDomain`, `AnswerCategory`, `PhysicsProblem`, `Answer`, `PhysicalDataset`, `PhysicsSolution`, `BaseModelClient`, `create_model_client`, `PRKitLogger`—the shared abstractions used across the toolkit.
|
|
68
|
+
- **`prkit_datasets`**: A Datasets-like hub that downloads/loads benchmarks into the unified schema (`PhysicsProblem`, `PhysicalDataset`).
|
|
69
|
+
- **`prkit_annotation`**: Workflow-oriented tools for structured, lower-level labels (e.g., domain/subdomain, theorem usage).
|
|
70
|
+
- **`prkit_evaluation`**: Evaluate-like components for physics-oriented scoring and comparison (e.g., symbolic/numerical answer matching).
|
|
71
|
+
|
|
72
|
+
### 💡 Quick Example
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from prkit.prkit_datasets import DatasetHub
|
|
76
|
+
from prkit.prkit_core.model_clients import create_model_client
|
|
77
|
+
|
|
78
|
+
# Load any benchmark into the unified schema (PhysicsProblem, PhysicalDataset)
|
|
79
|
+
dataset = DatasetHub.load("physreason", variant="full", split="test")
|
|
80
|
+
|
|
81
|
+
# Run inference with the unified model client (core component)
|
|
82
|
+
client = create_model_client("gpt-4.1-mini")
|
|
83
|
+
for problem in dataset[:3]:
|
|
84
|
+
print(client.chat(problem.question)[:200])
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The same pattern works across different datasets and model providers—swap the dataset name or model identifier.
|
|
88
|
+
|
|
89
|
+
### 📖 Documentation
|
|
90
|
+
|
|
91
|
+
**Quick Links:**
|
|
92
|
+
- 🔧 **[CORE.md](CORE.md)** - Core components: domain model, model client, logger, and definitions
|
|
93
|
+
- 📚 **[DATASETS.md](DATASETS.md)** - Complete guide to supported datasets and benchmarks
|
|
94
|
+
- 📊 **[EVALUATION.md](EVALUATION.md)** - Evaluation metrics and comparison strategies
|
|
95
|
+
- 📝 **[CHANGELOG.md](CHANGELOG.md)** - Version history and release notes
|
|
96
|
+
|
|
97
|
+
## 🏗️ Repository Structure
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
physical_reasoning_toolkit/
|
|
101
|
+
├── src/prkit/ # Main package (modern src-layout)
|
|
102
|
+
│ ├── prkit_core/ # Core components (domain models, model clients, logging)
|
|
103
|
+
│ ├── prkit_datasets/ # Dataset loading and management
|
|
104
|
+
│ ├── prkit_annotation/ # Annotation workflows and tools
|
|
105
|
+
│ └── prkit_evaluation/ # Evaluation metrics and benchmarks
|
|
106
|
+
├── tests/ # Unit tests
|
|
107
|
+
├── pyproject.toml # Package configuration
|
|
108
|
+
├── LICENSE # MIT License
|
|
109
|
+
└── README.md # This file
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Note**: The actual dataset files are stored externally (see Environment Setup section). This repository contains only the toolkit code, examples, and documentation.
|
|
113
|
+
|
|
114
|
+
### What's Included vs. External
|
|
115
|
+
|
|
116
|
+
**In Repository (Code & Documentation):**
|
|
117
|
+
- ✅ **src/prkit/**: Complete toolkit with core components and 3 subpackages
|
|
118
|
+
- ✅ **tests/**: Unit tests (for contributors)
|
|
119
|
+
|
|
120
|
+
**External (Data & Runtime):**
|
|
121
|
+
- 📁 **Data Directory**: Dataset files (set via `DATASET_CACHE_DIR`)
|
|
122
|
+
- 🔑 **API Keys**: Model provider credentials (if applicable)
|
|
123
|
+
- 📊 **Log Files**: Runtime logs (default: `{cwd}/prkit_logs/prkit.log`, can be overridden via `PRKIT_LOG_FILE`)
|
|
124
|
+
|
|
125
|
+
## 🚀 Quick Start
|
|
126
|
+
|
|
127
|
+
### Prerequisites
|
|
128
|
+
- **Python 3.10+** (required)
|
|
129
|
+
|
|
130
|
+
### Installation
|
|
131
|
+
|
|
132
|
+
#### Option 1: Install from PyPI (Recommended, not available yet)
|
|
133
|
+
```bash
|
|
134
|
+
# Install the latest stable version
|
|
135
|
+
pip install physical-reasoning-toolkit
|
|
136
|
+
|
|
137
|
+
# Verify installation
|
|
138
|
+
python -c "import prkit; print(prkit.__version__)"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
#### Option 2: Install from Source
|
|
142
|
+
|
|
143
|
+
**Step 1: Clone the Repository**
|
|
144
|
+
```bash
|
|
145
|
+
git clone https://github.com/sherryzyh/physical_reasoning_toolkit.git
|
|
146
|
+
cd physical_reasoning_toolkit
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Step 2: Set Up Virtual Environment**
|
|
150
|
+
```bash
|
|
151
|
+
# Create virtual environment
|
|
152
|
+
python -m venv venv
|
|
153
|
+
|
|
154
|
+
# Activate (macOS/Linux)
|
|
155
|
+
source venv/bin/activate
|
|
156
|
+
|
|
157
|
+
# Activate (Windows)
|
|
158
|
+
venv\Scripts\activate
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Step 3: Install**
|
|
162
|
+
```bash
|
|
163
|
+
# Install the package (regular install for end users)
|
|
164
|
+
pip install .
|
|
165
|
+
|
|
166
|
+
# Verify installation
|
|
167
|
+
python -c "import prkit; print('✅ Toolkit installed successfully!')"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
<!-- #### Option 3: Install from Source (For Development)
|
|
171
|
+
|
|
172
|
+
If you plan to contribute or modify the code, install in editable mode with dev dependencies:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# After cloning and activating venv (see Steps 1–2 above)
|
|
176
|
+
pip install -e ".[dev]"
|
|
177
|
+
``` -->
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
### Provider API Key Setup
|
|
181
|
+
```bash
|
|
182
|
+
# For model provider integration (optional)
|
|
183
|
+
export OPENAI_API_KEY="your-openai-api-key"
|
|
184
|
+
export GEMINI_API_KEY="your-gemini-api-key"
|
|
185
|
+
export DEEPSEEK_API_KEY="your-deepseek-api-key"
|
|
186
|
+
|
|
187
|
+
# For logging configuration (optional)
|
|
188
|
+
export PRKIT_LOG_LEVEL=INFO
|
|
189
|
+
export PRKIT_LOG_FILE=/var/log/prkit.log # Optional: defaults to {cwd}/prkit_logs/prkit.log if not set
|
|
190
|
+
```
|
|
191
|
+
📖 **See [CORE.md](CORE.md) (Model Client section) for supported providers and usage.**
|
|
192
|
+
|
|
193
|
+
### Data Directory Setup
|
|
194
|
+
```bash
|
|
195
|
+
# Set up data directory structure (external to repository)
|
|
196
|
+
mkdir -p ~/data
|
|
197
|
+
export DATASET_CACHE_DIR=~/data
|
|
198
|
+
|
|
199
|
+
# Download datasets using DatasetHub with auto_download=True
|
|
200
|
+
python -c "from prkit.prkit_datasets import DatasetHub; DatasetHub.load('ugphysics', auto_download=True)"
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**Note**: The data directory is external to the repository and contains the actual dataset files. The default cache directory is `~/PHYSICAL_REASONING_DATASETS/` if `DATASET_CACHE_DIR` is not set. Use `auto_download=True` when loading datasets to automatically download them if they don't exist.
|
|
204
|
+
|
|
205
|
+
### Validate Setup
|
|
206
|
+
```bash
|
|
207
|
+
python -c "
|
|
208
|
+
import prkit
|
|
209
|
+
from prkit.prkit_datasets import DatasetHub
|
|
210
|
+
from prkit.prkit_annotation.workflows import WorkflowComposer
|
|
211
|
+
print('✅ All packages imported successfully!')
|
|
212
|
+
print(f'PRKit version: {prkit.__version__}')
|
|
213
|
+
"
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## 📦 Package Overview
|
|
217
|
+
|
|
218
|
+
The toolkit is organized around **core components** and three subpackages that use them. Subpackages depend only on `prkit_core`; there are no direct dependencies between `prkit_datasets`, `prkit_annotation`, and `prkit_evaluation`.
|
|
219
|
+
|
|
220
|
+
| Component | Purpose |
|
|
221
|
+
|-----------|---------|
|
|
222
|
+
| `prkit_core` | Core components, see below |
|
|
223
|
+
| `prkit_datasets` | Dataset hub: loaders, downloaders, unified schema |
|
|
224
|
+
| `prkit_evaluation` | Comparators and accuracy metrics |
|
|
225
|
+
| `prkit_annotation` | Workflow pipelines for domain/theorem annotation |
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
### Core Components 🔧
|
|
229
|
+
|
|
230
|
+
The essential building blocks of the physical-reasoning-toolkit. All datasets, inference, evaluation, and annotation workflows use these components.
|
|
231
|
+
|
|
232
|
+
* **PhysicsDomain** — Enumeration of physics subfields (mechanics, thermodynamics, quantum mechanics, optics, etc.) for problem classification. Aligned with UGPhysics, PHYBench, TPBench. Use `PhysicsDomain.from_string()` for flexible parsing.
|
|
233
|
+
* **AnswerCategory** — Enumeration of answer types for normalization and evaluation: `NUMBER`, `PHYSICAL_QUANTITY`, `EQUATION`, `FORMULA`, `TEXT`, `OPTION`. Drives how answers are compared (numerical precision, symbolic equivalence, exact match).
|
|
234
|
+
* **PhysicsProblem** — The canonical representation of a physics problem. Required: `problem_id`, `question`. Optional: `answer` (Answer), `solution`, `domain`, `image_path`, `problem_type` (MC/OE), `options`, `correct_option`. Supports dictionary-like access and `load_images()` for visual problems.
|
|
235
|
+
* **Answer** — Unified answer model. `value` holds the number (NUMBER), numeric part (PHYSICAL_QUANTITY), option string (OPTION), or plain string (EQUATION, FORMULA, TEXT). `unit` is optional and used only for PHYSICAL_QUANTITY. Type checks, unit helpers, LaTeX handling, option indexing.
|
|
236
|
+
* **PhysicalDataset** — Collection of `PhysicsProblem` instances. Indexing, slicing, `get_by_id()`, `filter_by_domain()`, `take()`, `sample()`, `save_to_json()` / `from_json()`. Provides `get_statistics()` for domain and problem-type distribution.
|
|
237
|
+
* **PhysicsSolution** — Bundles a `PhysicsProblem`, model `agent_answer`, and optional `intermediate_steps`. Captures the full solution trace for evaluation and analysis.
|
|
238
|
+
* **BaseModelClient** — Abstract base for model clients. Subclasses implement `chat(user_prompt, image_paths=None)`.
|
|
239
|
+
* **PRKitLogger** — Centralized logging with colored output, file logging, and env config (`PRKIT_LOG_LEVEL`, `PRKIT_LOG_FILE`, etc.).
|
|
240
|
+
|
|
241
|
+
📖 See [CORE.md](CORE.md) for the full domain model, entity relationships, subpackage dependency diagram, and import reference.
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
### prkit_evaluation 📈
|
|
245
|
+
Answer comparators (symbolic, numerical, textual, option-based), accuracy evaluator, and physics-focused assessment protocols.
|
|
246
|
+
|
|
247
|
+
📖 [EVALUATION.md](EVALUATION.md)
|
|
248
|
+
|
|
249
|
+
### prkit_datasets 📊
|
|
250
|
+
Dataset hub with a Datasets-like interface: `DatasetHub.load()` for PHYBench, PhysReason, UGPhysics, SeePhys, PhyX (plus JEEBench, TPBench loaders). Auto-download, variant selection, and reproducible sampling.
|
|
251
|
+
|
|
252
|
+
📖 [DATASETS.md](DATASETS.md)
|
|
253
|
+
|
|
254
|
+
### prkit_annotation 🏷️
|
|
255
|
+
Modular workflows (domain classification, theorem extraction) via `WorkflowComposer` and presets. Model-assisted and human-in-the-loop.
|
|
256
|
+
|
|
257
|
+
📖 [ANNOTATION.md](ANNOTATION.md)
|
|
258
|
+
|
|
259
|
+
## 🆘 Troubleshooting
|
|
260
|
+
|
|
261
|
+
### Common Issues
|
|
262
|
+
|
|
263
|
+
#### Python Version Problems
|
|
264
|
+
```bash
|
|
265
|
+
# Check Python version
|
|
266
|
+
python --version # Should be 3.10+
|
|
267
|
+
|
|
268
|
+
# If using wrong version
|
|
269
|
+
python -m venv venv
|
|
270
|
+
source venv/bin/activate
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
#### Import Errors
|
|
274
|
+
```bash
|
|
275
|
+
# Reinstall in development mode
|
|
276
|
+
pip install -e .
|
|
277
|
+
|
|
278
|
+
# Check installation
|
|
279
|
+
pip show physical-reasoning-toolkit
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
#### Data Directory Issues
|
|
283
|
+
```bash
|
|
284
|
+
# Set data directory (external to repository)
|
|
285
|
+
export DATASET_CACHE_DIR=/path/to/your/data
|
|
286
|
+
|
|
287
|
+
# Check directory structure
|
|
288
|
+
ls -la $DATASET_CACHE_DIR
|
|
289
|
+
|
|
290
|
+
# Verify dataset files exist
|
|
291
|
+
ls -la $DATASET_CACHE_DIR/ugphysics/
|
|
292
|
+
ls -la $DATASET_CACHE_DIR/PhysReason/
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Getting Help
|
|
296
|
+
1. **Review logs**: Check logging output for detailed error information
|
|
297
|
+
2. **Verify setup**: Run the testing commands above
|
|
298
|
+
3. **Check data**: Ensure datasets are properly downloaded and accessible
|
|
299
|
+
4. **Check documentation**: Start with the root docs linked below
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
## 🤝 Contributing
|
|
303
|
+
|
|
304
|
+
### Community & Support
|
|
305
|
+
- **GitHub Issues**: [Report bugs or request features](https://github.com/sherryzyh/physical_reasoning_toolkit/issues)
|
|
306
|
+
- **Discussions**: Share ideas and get help
|
|
307
|
+
|
|
308
|
+
### Development Setup
|
|
309
|
+
```bash
|
|
310
|
+
# Clone and install in development mode
|
|
311
|
+
git clone https://github.com/sherryzyh/physical_reasoning_toolkit.git
|
|
312
|
+
cd physical_reasoning_toolkit
|
|
313
|
+
pip install -e ".[dev]"
|
|
314
|
+
|
|
315
|
+
# Run code quality tools
|
|
316
|
+
black src/
|
|
317
|
+
isort src/
|
|
318
|
+
mypy src/
|
|
319
|
+
|
|
320
|
+
# Run tests
|
|
321
|
+
pytest tests/
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Adding New Features
|
|
325
|
+
1. **Follow existing patterns**: Use consistent logging and error handling
|
|
326
|
+
2. **Add tests**: Include tests for new functionality
|
|
327
|
+
3. **Update documentation**: Add examples and update README files
|
|
328
|
+
4. **Maintain compatibility**: Ensure changes don't break existing functionality
|
|
329
|
+
|
|
330
|
+
### Submitting Pull Requests
|
|
331
|
+
1. Fork the repository
|
|
332
|
+
2. Create a feature branch
|
|
333
|
+
3. Make your changes with tests
|
|
334
|
+
4. Ensure all tests pass
|
|
335
|
+
5. Submit a pull request with clear description
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
## 📄 Citation
|
|
339
|
+
|
|
340
|
+
If you use PRKit in your research, please cite it as follows:
|
|
341
|
+
|
|
342
|
+
**BibTeX:**
|
|
343
|
+
```bibtex
|
|
344
|
+
@software{zhang2026physicalreasoningtoolkit,
|
|
345
|
+
author = {Zhang, Yinghuan},
|
|
346
|
+
title = {Physical Reasoning Toolkit},
|
|
347
|
+
year = {2026},
|
|
348
|
+
license = {MIT},
|
|
349
|
+
url = {https://github.com/sherryzyh/physical_reasoning_toolkit},
|
|
350
|
+
abstract = {A unified toolkit for researchers and engineers working on AI physical reasoning. PRKit provides a shared foundation for representing physics problems, running inference with multiple model providers, evaluating outputs with physics-aware comparators, and building structured annotation workflows.}
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
For citation files, see `CITATION.cff` and `CITATION.bib` in the repository root.
|
|
355
|
+
|
|
356
|
+
## 🙏 Acknowledgments
|
|
357
|
+
|
|
358
|
+
PRKit integrates and builds upon several excellent physics reasoning benchmarks and datasets. We thank the creators of:
|
|
359
|
+
- **PhysReason**, **PHYBench**, **UGPhysics**, **SeePhys**, **PhyX**, and other benchmark datasets
|
|
360
|
+
- The open-source community for their valuable contributions and feedback
|
|
361
|
+
|
|
362
|
+
**Note:** For detailed citations and references to the original dataset papers, please see the [Citations section](DATASETS.md#citations) in `DATASETS.md`.
|
|
363
|
+
|
|
364
|
+
## 📝 License
|
|
365
|
+
|
|
366
|
+
This project is licensed under the **MIT License** - see the [LICENSE](LICENSE) file for details.
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
**Ready to advance physics reasoning research! 🚀✨**
|
|
371
|
+
|
|
372
|
+
**Quick Links:** `pip install physical-reasoning-toolkit` | [GitHub](https://github.com/sherryzyh/physical_reasoning_toolkit) | [Documentation](https://github.com/sherryzyh/physical_reasoning_toolkit#readme) | [Issues](https://github.com/sherryzyh/physical_reasoning_toolkit/issues)
|