pxq 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pxq-0.1.0/.github/RELEASE.md +162 -0
- pxq-0.1.0/.github/workflows/publish.yml +33 -0
- pxq-0.1.0/.gitignore +12 -0
- pxq-0.1.0/.hive/issues.jsonl +15 -0
- pxq-0.1.0/.python-version +1 -0
- pxq-0.1.0/.tmp/design.md +144 -0
- pxq-0.1.0/.tmp/task.md +70 -0
- pxq-0.1.0/PKG-INFO +349 -0
- pxq-0.1.0/README.md +315 -0
- pxq-0.1.0/context/requirements.md +73 -0
- pxq-0.1.0/docs/GPUs.md +159 -0
- pxq-0.1.0/docs/cli-reference.md +244 -0
- pxq-0.1.0/docs/cli-reference_ja.md +246 -0
- pxq-0.1.0/examples/local/README.md +217 -0
- pxq-0.1.0/examples/local/job1_sleep_hello.py +27 -0
- pxq-0.1.0/examples/local/job2_sleep_hello.py +27 -0
- pxq-0.1.0/examples/local/run_two_jobs.sh +73 -0
- pxq-0.1.0/examples/runpod/README.md +408 -0
- pxq-0.1.0/examples/runpod/config/dataset-download.yaml +13 -0
- pxq-0.1.0/examples/runpod/config/torch-cuda-with-volume.yaml +13 -0
- pxq-0.1.0/examples/runpod/dataset_download.sh +8 -0
- pxq-0.1.0/examples/runpod/requirements.txt +2 -0
- pxq-0.1.0/examples/runpod/train_model.py +258 -0
- pxq-0.1.0/examples/runpod/train_on_cuda.sh +6 -0
- pxq-0.1.0/main.py +6 -0
- pxq-0.1.0/pyproject.toml +65 -0
- pxq-0.1.0/runpod_openapi.json +4536 -0
- pxq-0.1.0/src/pxq/__init__.py +3 -0
- pxq-0.1.0/src/pxq/__main__.py +6 -0
- pxq-0.1.0/src/pxq/api/__init__.py +15 -0
- pxq-0.1.0/src/pxq/api/health.py +17 -0
- pxq-0.1.0/src/pxq/api/jobs.py +544 -0
- pxq-0.1.0/src/pxq/cli.py +533 -0
- pxq-0.1.0/src/pxq/client.py +215 -0
- pxq-0.1.0/src/pxq/config.py +54 -0
- pxq-0.1.0/src/pxq/config_loader.py +149 -0
- pxq-0.1.0/src/pxq/dashboard/__init__.py +5 -0
- pxq-0.1.0/src/pxq/dashboard/routes.py +138 -0
- pxq-0.1.0/src/pxq/dashboard/templates/base.html +73 -0
- pxq-0.1.0/src/pxq/dashboard/templates/index.html +41 -0
- pxq-0.1.0/src/pxq/dashboard/templates/job_detail.html +124 -0
- pxq-0.1.0/src/pxq/dashboard/templates/partials/job_list.html +56 -0
- pxq-0.1.0/src/pxq/dashboard/templates/partials/job_logs.html +82 -0
- pxq-0.1.0/src/pxq/executor.py +436 -0
- pxq-0.1.0/src/pxq/log_collector.py +451 -0
- pxq-0.1.0/src/pxq/models.py +227 -0
- pxq-0.1.0/src/pxq/providers/__init__.py +3 -0
- pxq-0.1.0/src/pxq/providers/_runpod_client.py +786 -0
- pxq-0.1.0/src/pxq/providers/_runpod_exec.py +438 -0
- pxq-0.1.0/src/pxq/providers/local_exec.py +180 -0
- pxq-0.1.0/src/pxq/providers/runpod_client.py +647 -0
- pxq-0.1.0/src/pxq/providers/runpod_exec.py +854 -0
- pxq-0.1.0/src/pxq/providers/runpod_gpu_types.py +93 -0
- pxq-0.1.0/src/pxq/providers/runpod_provider.py +149 -0
- pxq-0.1.0/src/pxq/providers/runpod_ssh.py +143 -0
- pxq-0.1.0/src/pxq/recovery.py +91 -0
- pxq-0.1.0/src/pxq/scheduler.py +57 -0
- pxq-0.1.0/src/pxq/server.py +80 -0
- pxq-0.1.0/src/pxq/server_pid.py +302 -0
- pxq-0.1.0/src/pxq/storage.py +711 -0
- pxq-0.1.0/tests/__init__.py +1 -0
- pxq-0.1.0/tests/cli/__init__.py +1 -0
- pxq-0.1.0/tests/cli/test_add_ls_status.py +807 -0
- pxq-0.1.0/tests/cli/test_cancel_stop.py +348 -0
- pxq-0.1.0/tests/cli/test_cli_base.py +115 -0
- pxq-0.1.0/tests/cli/test_config_dir_flags.py +518 -0
- pxq-0.1.0/tests/cli/test_server_diagnostics.py +246 -0
- pxq-0.1.0/tests/cli/test_server_start.py +106 -0
- pxq-0.1.0/tests/cli/test_ssh_command.py +148 -0
- pxq-0.1.0/tests/conftest.py +127 -0
- pxq-0.1.0/tests/dashboard/__init__.py +1 -0
- pxq-0.1.0/tests/dashboard/test_dashboard_ui.py +725 -0
- pxq-0.1.0/tests/dashboard/test_polling_stability.py +382 -0
- pxq-0.1.0/tests/fixtures/config-mount-path.yaml +6 -0
- pxq-0.1.0/tests/fixtures/config-volume-path.yaml +6 -0
- pxq-0.1.0/tests/integration/__init__.py +1 -0
- pxq-0.1.0/tests/integration/test_jobs_api.py +1430 -0
- pxq-0.1.0/tests/integration/test_log_collection.py +497 -0
- pxq-0.1.0/tests/integration/test_managed_lifecycle.py +884 -0
- pxq-0.1.0/tests/integration/test_managed_stop.py +367 -0
- pxq-0.1.0/tests/integration/test_nonmanaged_runtime_regression.py +390 -0
- pxq-0.1.0/tests/integration/test_parallelism.py +137 -0
- pxq-0.1.0/tests/integration/test_provisioning_timeout.py +88 -0
- pxq-0.1.0/tests/integration/test_recovery.py +225 -0
- pxq-0.1.0/tests/integration/test_release_gate.py +325 -0
- pxq-0.1.0/tests/integration/test_stderr_verification.py +291 -0
- pxq-0.1.0/tests/integration/test_stop_regressions.py +259 -0
- pxq-0.1.0/tests/providers/test_runpod_exec.py +346 -0
- pxq-0.1.0/tests/providers/test_runpod_provider.py +65 -0
- pxq-0.1.0/tests/providers/test_runpod_ssh.py +219 -0
- pxq-0.1.0/tests/unit/__init__.py +1 -0
- pxq-0.1.0/tests/unit/test_config.py +63 -0
- pxq-0.1.0/tests/unit/test_executor.py +1601 -0
- pxq-0.1.0/tests/unit/test_pxqignore.py +97 -0
- pxq-0.1.0/tests/unit/test_runpod_client.py +481 -0
- pxq-0.1.0/tests/unit/test_runpod_gpu_types.py +66 -0
- pxq-0.1.0/tests/unit/test_sanity.py +21 -0
- pxq-0.1.0/tests/unit/test_server.py +47 -0
- pxq-0.1.0/tests/unit/test_server_pid.py +477 -0
- pxq-0.1.0/tests/unit/test_state_machine.py +592 -0
- pxq-0.1.0/uv.lock +658 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# PyPI Release Guide
|
|
2
|
+
|
|
3
|
+
This document describes how to publish `pxq` to PyPI.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
### 1. PyPI Account
|
|
8
|
+
|
|
9
|
+
Create a PyPI account at https://pypi.org/account/ if you don't have one.
|
|
10
|
+
|
|
11
|
+
### 2. Trusted Publisher Setup
|
|
12
|
+
|
|
13
|
+
Trusted Publisher allows publishing to PyPI without API tokens, using GitHub OIDC.
|
|
14
|
+
|
|
15
|
+
**Steps:**
|
|
16
|
+
|
|
17
|
+
1. Go to https://pypi.org/manage/account/publishing/
|
|
18
|
+
2. Click "Add a trusted publisher"
|
|
19
|
+
3. Select "GitHub Actions"
|
|
20
|
+
4. Fill in:
|
|
21
|
+
- **Project name**: `pxq`
|
|
22
|
+
- **Owner**: `takeru1205`
|
|
23
|
+
- **Repository name**: `pxq`
|
|
24
|
+
- **Workflow name**: `publish.yml`
|
|
25
|
+
- **Environment**: `pypi` (or leave blank for all environments)
|
|
26
|
+
5. Click "Add"
|
|
27
|
+
|
|
28
|
+
### 3. Create PyPI Project (First Time Only)
|
|
29
|
+
|
|
30
|
+
For the first release:
|
|
31
|
+
|
|
32
|
+
1. Go to https://pypi.org/project/pxq/
|
|
33
|
+
2. If the project doesn't exist, create it with the name `pxq`
|
|
34
|
+
3. Add the Trusted Publisher you created above
|
|
35
|
+
|
|
36
|
+
## Release Process
|
|
37
|
+
|
|
38
|
+
### Step 1: Update Version
|
|
39
|
+
|
|
40
|
+
Update the version in `pyproject.toml`:
|
|
41
|
+
|
|
42
|
+
```toml
|
|
43
|
+
[project]
|
|
44
|
+
name = "pxq"
|
|
45
|
+
version = "0.1.0" # Update this
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Versioning scheme**: Follow [Semantic Versioning](https://semver.org/)
|
|
49
|
+
- `0.1.0` - Initial release
|
|
50
|
+
- `0.1.1` - Bug fix
|
|
51
|
+
- `0.2.0` - New feature
|
|
52
|
+
|
|
53
|
+
### Step 2: Commit Changes
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git add .
|
|
57
|
+
git commit -m "Bump version to 0.1.0"
|
|
58
|
+
git push origin main
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Step 3: Create GitHub Release
|
|
62
|
+
|
|
63
|
+
**Via GitHub Web UI:**
|
|
64
|
+
|
|
65
|
+
1. Go to https://github.com/takeru1205/pxq/releases
|
|
66
|
+
2. Click "Draft a new release"
|
|
67
|
+
3. Fill in:
|
|
68
|
+
- **Tag version**: `v0.1.0` (match the version with `v` prefix)
|
|
69
|
+
- **Release title**: `v0.1.0`
|
|
70
|
+
- **Description**: Add release notes (see template below)
|
|
71
|
+
4. Click "Publish release"
|
|
72
|
+
|
|
73
|
+
**Via GitHub CLI:**
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
gh release create v0.1.0 \
|
|
77
|
+
--title "v0.1.0" \
|
|
78
|
+
--notes "Release notes here" \
|
|
79
|
+
--generate-notes
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Step 4: Automatic PyPI Publish
|
|
83
|
+
|
|
84
|
+
Once the release is published:
|
|
85
|
+
|
|
86
|
+
1. GitHub Actions workflow `.github/workflows/publish.yml` is triggered automatically
|
|
87
|
+
2. The workflow builds the package and publishes to PyPI
|
|
88
|
+
3. Monitor the action at: https://github.com/takeru1205/pxq/actions
|
|
89
|
+
|
|
90
|
+
### Step 5: Verify Publication
|
|
91
|
+
|
|
92
|
+
Check that the package is published:
|
|
93
|
+
|
|
94
|
+
- **PyPI**: https://pypi.org/project/pxq/
|
|
95
|
+
- **Installation test**:
|
|
96
|
+
```bash
|
|
97
|
+
pip install pxq
|
|
98
|
+
pxq --version
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Release Notes Template
|
|
102
|
+
|
|
103
|
+
```markdown
|
|
104
|
+
## What's Changed
|
|
105
|
+
|
|
106
|
+
### New Features
|
|
107
|
+
- Feature description
|
|
108
|
+
|
|
109
|
+
### Bug Fixes
|
|
110
|
+
- Fix description
|
|
111
|
+
|
|
112
|
+
### Improvements
|
|
113
|
+
- Improvement description
|
|
114
|
+
|
|
115
|
+
## Installation
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# From PyPI
|
|
119
|
+
pip install pxq
|
|
120
|
+
|
|
121
|
+
# Or with uv
|
|
122
|
+
uv tool install pxq
|
|
123
|
+
|
|
124
|
+
# From GitHub (latest)
|
|
125
|
+
uv tool install git+https://github.com/takeru1205/pxq.git
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Full Changelog**: https://github.com/takeru1205/pxq/compare/v0.0.0...v0.1.0
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Troubleshooting
|
|
132
|
+
|
|
133
|
+
### Workflow Fails
|
|
134
|
+
|
|
135
|
+
**Check the logs at**: https://github.com/takeru1205/pxq/actions
|
|
136
|
+
|
|
137
|
+
Common issues:
|
|
138
|
+
- **Trusted Publisher not configured**: Verify the publisher setup in PyPI
|
|
139
|
+
- **Version already exists**: Bump the version number
|
|
140
|
+
- **Build errors**: Check `uv build` output locally
|
|
141
|
+
|
|
142
|
+
### Package Name Already Taken
|
|
143
|
+
|
|
144
|
+
If `pxq` is already taken on PyPI:
|
|
145
|
+
- You cannot publish with the same name
|
|
146
|
+
- Consider a different name or contact the current owner
|
|
147
|
+
|
|
148
|
+
### Manual Publish (Fallback)
|
|
149
|
+
|
|
150
|
+
If Trusted Publisher fails, use API token:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# Add token as GitHub Secret: PYPI_TOKEN
|
|
154
|
+
# Then modify publish.yml to use:
|
|
155
|
+
- run: uv publish --token ${{ secrets.PYPI_TOKEN }}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Version History
|
|
159
|
+
|
|
160
|
+
| Version | Date | Notes |
|
|
161
|
+
|---------|------|-------|
|
|
162
|
+
| 0.1.0 | TBD | Initial release |
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write # Required for Trusted Publisher
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
pypi-publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment:
|
|
15
|
+
name: pypi
|
|
16
|
+
url: https://pypi.org/p/pxq
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v5
|
|
22
|
+
with:
|
|
23
|
+
enable-cache: true
|
|
24
|
+
cache-dependency-glob: "uv.lock"
|
|
25
|
+
|
|
26
|
+
- name: Set up Python
|
|
27
|
+
run: uv python install 3.11
|
|
28
|
+
|
|
29
|
+
- name: Build package
|
|
30
|
+
run: uv build
|
|
31
|
+
|
|
32
|
+
- name: Publish to PyPI
|
|
33
|
+
run: uv publish
|
pxq-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{"id":"cell-4qpa5z-mmgc5grykrf","title":"Allow dashboard users to view logs for completed jobs","description":"Enable pxqueue dashboard users to view actual log content (stdout/stderr) for completed jobs. Current system only stores artifact metadata (paths, sizes) without content, making logs inaccessible after pod termination.","status":"open","priority":1,"issue_type":"epic","created_at":"2026-03-07T13:04:50.734Z","updated_at":"2026-03-07T13:04:50.734Z","dependencies":[],"labels":[],"comments":[]}
|
|
2
|
+
{"id":"cell-4qpa5z-mmgc5gs5kvu","title":"Design log content storage schema and add content column to artifacts table","status":"open","priority":3,"issue_type":"task","created_at":"2026-03-07T13:04:50.741Z","updated_at":"2026-03-07T13:04:50.741Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
3
|
+
{"id":"cell-4qpa5z-mmgc5gs7b5p","title":"Update storage.py to persist actual log content bytes","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.743Z","updated_at":"2026-03-07T13:04:50.743Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
4
|
+
{"id":"cell-4qpa5z-mmgc5gs7tqe","title":"Add final log collection hook when job transitions to terminal state","status":"open","priority":3,"issue_type":"task","created_at":"2026-03-07T13:04:50.743Z","updated_at":"2026-03-07T13:04:50.743Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
5
|
+
{"id":"cell-4qpa5z-mmgc5gs8psl","title":"Modify log_collector.py to capture and pass content to create_artifact()","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.744Z","updated_at":"2026-03-07T13:04:50.744Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
6
|
+
{"id":"cell-4qpa5z-mmgc5gs9tg1","title":"Update dashboard templates to display actual log content","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.745Z","updated_at":"2026-03-07T13:04:50.745Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
7
|
+
{"id":"cell-4qpa5z-mmgc5gsalpq","title":"Add API endpoint for log content retrieval","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-07T13:04:50.746Z","updated_at":"2026-03-07T13:04:50.746Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
8
|
+
{"id":"cell-4qpa5z-mmgc5gsblkl","title":"Add integration tests for completed-job log viewing","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.747Z","updated_at":"2026-03-07T13:04:50.747Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
|
|
9
|
+
{"id":"cell-4qpa5z-mmh9hngzu6t","title":"RunPod stderr verification flow + documentation","description":"Create verification flow for RunPod stderr capture that proves stderr content renders in the dashboard with proper cleanup. Includes: (1) integration test with --run-integration flag, (2) manual verification script, (3) documentation updates to examples/runpod/README.md, (4) DB and dashboard HTML verification commands.","status":"open","priority":1,"issue_type":"epic","created_at":"2026-03-08T04:38:06.611Z","updated_at":"2026-03-08T04:38:06.611Z","dependencies":[],"labels":[],"comments":[]}
|
|
10
|
+
{"id":"cell-4qpa5z-mmh9hnh6ch9","title":"Analyze test_output.py and verify it produces stderr","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.618Z","updated_at":"2026-03-08T04:38:06.618Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
11
|
+
{"id":"cell-4qpa5z-mmh9hnh99oz","title":"Research existing integration test patterns","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.621Z","updated_at":"2026-03-08T04:38:06.621Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
12
|
+
{"id":"cell-4qpa5z-mmh9hnhbtwm","title":"Create stderr verification integration test","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-08T04:38:06.623Z","updated_at":"2026-03-08T04:38:06.623Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
13
|
+
{"id":"cell-4qpa5z-mmh9hnhg33c","title":"Create manual verification script","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.628Z","updated_at":"2026-03-08T04:38:06.628Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
14
|
+
{"id":"cell-4qpa5z-mmh9hnhgdfp","title":"Update RunPod README with stderr verification section","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.628Z","updated_at":"2026-03-08T04:38:06.628Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
15
|
+
{"id":"cell-4qpa5z-mmh9hnhhl5y","title":"Add DB and dashboard verification commands to test","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-08T04:38:06.629Z","updated_at":"2026-03-08T04:38:06.629Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
pxq-0.1.0/.tmp/design.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Server Identity and Stale PID Reconciliation Design
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This document defines the canonical server identity verification logic for pxq server. The goal is to reliably detect the actual pxq server process even when the PID file contains a stale PID, and to guard against accidentally touching non-pxq processes.
|
|
6
|
+
|
|
7
|
+
## Problem Statement
|
|
8
|
+
|
|
9
|
+
### Current Issues
|
|
10
|
+
|
|
11
|
+
1. **Stale PID File**: The PID file (`~/.pxq/server.pid`) may contain a PID that no longer exists or belongs to a different process.
|
|
12
|
+
2. **No Identity Verification**: Current implementation only checks if a process with the PID exists via `os.kill(pid, 0)`, but does not verify if it's actually the pxq server.
|
|
13
|
+
3. **False Positives**: If another process happens to get the same PID, the current code incorrectly assumes it's the pxq server.
|
|
14
|
+
4. **No Port Ownership Check**: The current code doesn't verify if the process is actually listening on the expected server port.
|
|
15
|
+
|
|
16
|
+
### Known Issues from Context
|
|
17
|
+
|
|
18
|
+
- Live server (PID 28692) and PID file (dead PID 35336) were mismatched
|
|
19
|
+
- Live server's openapi was missing `/api/jobs/stop` and `/api/jobs/{job_id}/cancel` endpoints
|
|
20
|
+
- `pxq cancel 28` (provisioning) returned 404 because the server wasn't properly detected
|
|
21
|
+
|
|
22
|
+
## Solution Design
|
|
23
|
+
|
|
24
|
+
### Core Principles
|
|
25
|
+
|
|
26
|
+
1. **Port-based Detection**: The true pxq server is identified by the process listening on `server_host:server_port` (default 127.0.0.1:8765).
|
|
27
|
+
2. **Identity Verification**: Verify the process is actually pxq server by checking its command line contains `uvicorn pxq.server:app`.
|
|
28
|
+
3. **Stale PID Handling**: Automatically detect and clean up stale PID files.
|
|
29
|
+
4. **Safety Guard**: Never touch a process that doesn't match the pxq server identity.
|
|
30
|
+
|
|
31
|
+
### Function Definitions
|
|
32
|
+
|
|
33
|
+
#### `get_pxq_server_pid() -> Optional[int]`
|
|
34
|
+
|
|
35
|
+
Returns the PID of the actual pxq server process, ignoring stale PID files.
|
|
36
|
+
|
|
37
|
+
**Algorithm**:
|
|
38
|
+
1. Get the configured server port (default 8765)
|
|
39
|
+
2. Use `lsof -ti:{port}` to find the PID listening on that port
|
|
40
|
+
3. If no process is listening on the port, return None
|
|
41
|
+
4. Verify the process is pxq server by checking cmdline contains `uvicorn pxq.server:app` or `pxq.server:app`
|
|
42
|
+
5. If identity verified, return the PID
|
|
43
|
+
6. If identity not verified, return None (non-pxq process owns the port)
|
|
44
|
+
|
|
45
|
+
**Edge Cases**:
|
|
46
|
+
- No process listening on port → return None
|
|
47
|
+
- Multiple PIDs from lsof → use the first one (single instance design)
|
|
48
|
+
- Permission denied on lsof → fallback to reading PID file with identity check
|
|
49
|
+
- Non-pxq process owns the port → return None (safety guard)
|
|
50
|
+
|
|
51
|
+
#### `is_pxq_server_running() -> bool`
|
|
52
|
+
|
|
53
|
+
Returns True if the actual pxq server is running.
|
|
54
|
+
|
|
55
|
+
**Implementation**:
|
|
56
|
+
- Returns `get_pxq_server_pid() is not None`
|
|
57
|
+
|
|
58
|
+
#### `cleanup_stale_pid() -> bool`
|
|
59
|
+
|
|
60
|
+
Removes the stale PID file if it doesn't match the actual pxq server.
|
|
61
|
+
|
|
62
|
+
**Algorithm**:
|
|
63
|
+
1. Get the actual pxq server PID via `get_pxq_server_pid()`
|
|
64
|
+
2. Read the PID file via `read_pid()`
|
|
65
|
+
3. If PID file doesn't exist, return False (nothing to clean)
|
|
66
|
+
4. If actual server PID matches PID file, return False (not stale)
|
|
67
|
+
5. If actual server PID is None (no server) or different from PID file:
|
|
68
|
+
- Delete the PID file
|
|
69
|
+
- Return True (cleaned up stale file)
|
|
70
|
+
|
|
71
|
+
### Identity Verification Details
|
|
72
|
+
|
|
73
|
+
**Process Identity Check**:
|
|
74
|
+
- Read `/proc/{pid}/cmdline` (Linux) or use `ps` command (macOS)
|
|
75
|
+
- Check if cmdline contains any of:
|
|
76
|
+
- `uvicorn pxq.server:app`
|
|
77
|
+
- `pxq.server:app`
|
|
78
|
+
- `pxq.server:create_app`
|
|
79
|
+
|
|
80
|
+
**Port Ownership Check**:
|
|
81
|
+
- Use `lsof -ti:{port}` to get PID listening on the port
|
|
82
|
+
- This is more reliable than trusting the PID file
|
|
83
|
+
|
|
84
|
+
### Platform Support
|
|
85
|
+
|
|
86
|
+
- **macOS**: Use `lsof -ti:{port}` and `ps -p {pid} -o command=`
|
|
87
|
+
- **Linux**: Use `lsof -ti:{port}` and read `/proc/{pid}/cmdline`
|
|
88
|
+
|
|
89
|
+
### Error Handling
|
|
90
|
+
|
|
91
|
+
1. **lsof not available**: Fallback to PID file check with identity verification
|
|
92
|
+
2. **Permission denied**: Handle gracefully, return None
|
|
93
|
+
3. **Process exits between checks**: Return None
|
|
94
|
+
|
|
95
|
+
## Testing Strategy
|
|
96
|
+
|
|
97
|
+
### Unit Tests
|
|
98
|
+
|
|
99
|
+
1. **Stale PID reconciliation happy path**:
|
|
100
|
+
- PID file contains dead PID
|
|
101
|
+
- Actual pxq server running on port
|
|
102
|
+
- `get_pxq_server_pid()` returns actual server PID
|
|
103
|
+
- `cleanup_stale_pid()` removes stale PID file
|
|
104
|
+
|
|
105
|
+
2. **Non-pxq listener failure guard**:
|
|
106
|
+
- Non-pxq process listening on port 8765
|
|
107
|
+
- `get_pxq_server_pid()` returns None
|
|
108
|
+
- `is_pxq_server_running()` returns False
|
|
109
|
+
|
|
110
|
+
3. **Normal operation**:
|
|
111
|
+
- pxq server running, PID file correct
|
|
112
|
+
- All functions work correctly
|
|
113
|
+
|
|
114
|
+
4. **No server running**:
|
|
115
|
+
- No process on port, no PID file
|
|
116
|
+
- All functions return None/False
|
|
117
|
+
|
|
118
|
+
### Test Commands
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Scenario 1: stale PID reconciliation happy path
|
|
122
|
+
uv run pytest tests/unit -k "server_pid or stale pid or listener" -q | tee .sisyphus/evidence/task-1-server-identity.txt
|
|
123
|
+
|
|
124
|
+
# Scenario 2: non-pxq listener failure guard
|
|
125
|
+
uv run pytest tests/unit -k "non pxq listener or foreign process" -q | tee .sisyphus/evidence/task-1-server-identity-error.txt
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Dependencies
|
|
129
|
+
|
|
130
|
+
- This task is Wave 1 foundation task
|
|
131
|
+
- Task 2, 3, 4 depend on this task
|
|
132
|
+
- No external dependencies
|
|
133
|
+
|
|
134
|
+
## Files to Modify
|
|
135
|
+
|
|
136
|
+
- `src/pxq/server_pid.py` - Add new functions
|
|
137
|
+
- `tests/unit/test_server_pid.py` - Add new test cases
|
|
138
|
+
- `src/pxq/cli.py` - Update to use new functions (if needed)
|
|
139
|
+
|
|
140
|
+
## Backward Compatibility
|
|
141
|
+
|
|
142
|
+
- Existing functions (`get_server_pid()`, `is_server_running()`) remain unchanged
|
|
143
|
+
- New functions are additions, not replacements
|
|
144
|
+
- CLI commands continue to work with existing functions initially
|
pxq-0.1.0/.tmp/task.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Task List: Server Identity and Stale PID Reconciliation
|
|
2
|
+
|
|
3
|
+
## Wave 1: Foundation Task
|
|
4
|
+
|
|
5
|
+
### Task 1: Define canonical server identity and stale-state reconciliation
|
|
6
|
+
|
|
7
|
+
**Status**: In Progress
|
|
8
|
+
|
|
9
|
+
**Description**:
|
|
10
|
+
Define the canonical server identity verification logic. Add functions to `src/pxq/server_pid.py` to:
|
|
11
|
+
- Detect actual pxq server process by port ownership and cmdline identity
|
|
12
|
+
- Clean up stale PID files automatically
|
|
13
|
+
- Guard against touching non-pxq processes
|
|
14
|
+
|
|
15
|
+
**Subtasks**:
|
|
16
|
+
|
|
17
|
+
- [x] Create design document (.tmp/design.md)
|
|
18
|
+
- [ ] Create task list (.tmp/task.md)
|
|
19
|
+
- [ ] Implement `get_pxq_server_pid()` function
|
|
20
|
+
- Use `lsof -ti:{port}` to find process listening on server port
|
|
21
|
+
- Verify process identity via cmdline check
|
|
22
|
+
- Return None if no pxq server found or non-pxq process owns port
|
|
23
|
+
- [ ] Implement `is_pxq_server_running()` function
|
|
24
|
+
- Wrapper around `get_pxq_server_pid()`
|
|
25
|
+
- [ ] Implement `cleanup_stale_pid()` function
|
|
26
|
+
- Compare PID file with actual server PID
|
|
27
|
+
- Remove stale PID file
|
|
28
|
+
- [ ] Add unit tests for new functions
|
|
29
|
+
- Test stale PID reconciliation happy path
|
|
30
|
+
- Test non-pxq listener failure guard
|
|
31
|
+
- Test normal operation
|
|
32
|
+
- Test no server running
|
|
33
|
+
- [ ] Run pytest and verify all tests pass
|
|
34
|
+
- [ ] Save pytest output to .sisyphus/evidence/
|
|
35
|
+
- [ ] Create learnings document
|
|
36
|
+
|
|
37
|
+
**Dependencies**: None (foundation task)
|
|
38
|
+
|
|
39
|
+
**Dependent Tasks**:
|
|
40
|
+
- Task 2: TBD
|
|
41
|
+
- Task 3: TBD
|
|
42
|
+
- Task 4: TBD
|
|
43
|
+
|
|
44
|
+
**Files to Modify**:
|
|
45
|
+
- `src/pxq/server_pid.py` - Add new functions
|
|
46
|
+
- `tests/unit/test_server_pid.py` - Add test cases
|
|
47
|
+
|
|
48
|
+
**Test Commands**:
|
|
49
|
+
```bash
|
|
50
|
+
# Run all server_pid tests
|
|
51
|
+
uv run pytest tests/unit/test_server_pid.py -v
|
|
52
|
+
|
|
53
|
+
# Run specific test categories
|
|
54
|
+
uv run pytest tests/unit -k "server_pid or stale pid or listener" -q
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Acceptance Criteria**:
|
|
58
|
+
- [ ] `get_pxq_server_pid()` returns actual pxq server PID (not stale PID)
|
|
59
|
+
- [ ] `get_pxq_server_pid()` returns None for non-pxq process on port
|
|
60
|
+
- [ ] `is_pxq_server_running()` accurately reflects pxq server status
|
|
61
|
+
- [ ] `cleanup_stale_pid()` removes stale PID files
|
|
62
|
+
- [ ] All existing tests continue to pass (no regression)
|
|
63
|
+
- [ ] New tests cover all scenarios
|
|
64
|
+
- [ ] pytest output saved to .sisyphus/evidence/
|
|
65
|
+
|
|
66
|
+
## Notes
|
|
67
|
+
|
|
68
|
+
- Platform: macOS (primary), Linux (secondary)
|
|
69
|
+
- Default server port: 8765
|
|
70
|
+
- Server identity: process running `uvicorn pxq.server:app`
|