pxq 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pxq-0.1.0/.github/RELEASE.md +162 -0
  2. pxq-0.1.0/.github/workflows/publish.yml +33 -0
  3. pxq-0.1.0/.gitignore +12 -0
  4. pxq-0.1.0/.hive/issues.jsonl +15 -0
  5. pxq-0.1.0/.python-version +1 -0
  6. pxq-0.1.0/.tmp/design.md +144 -0
  7. pxq-0.1.0/.tmp/task.md +70 -0
  8. pxq-0.1.0/PKG-INFO +349 -0
  9. pxq-0.1.0/README.md +315 -0
  10. pxq-0.1.0/context/requirements.md +73 -0
  11. pxq-0.1.0/docs/GPUs.md +159 -0
  12. pxq-0.1.0/docs/cli-reference.md +244 -0
  13. pxq-0.1.0/docs/cli-reference_ja.md +246 -0
  14. pxq-0.1.0/examples/local/README.md +217 -0
  15. pxq-0.1.0/examples/local/job1_sleep_hello.py +27 -0
  16. pxq-0.1.0/examples/local/job2_sleep_hello.py +27 -0
  17. pxq-0.1.0/examples/local/run_two_jobs.sh +73 -0
  18. pxq-0.1.0/examples/runpod/README.md +408 -0
  19. pxq-0.1.0/examples/runpod/config/dataset-download.yaml +13 -0
  20. pxq-0.1.0/examples/runpod/config/torch-cuda-with-volume.yaml +13 -0
  21. pxq-0.1.0/examples/runpod/dataset_download.sh +8 -0
  22. pxq-0.1.0/examples/runpod/requirements.txt +2 -0
  23. pxq-0.1.0/examples/runpod/train_model.py +258 -0
  24. pxq-0.1.0/examples/runpod/train_on_cuda.sh +6 -0
  25. pxq-0.1.0/main.py +6 -0
  26. pxq-0.1.0/pyproject.toml +65 -0
  27. pxq-0.1.0/runpod_openapi.json +4536 -0
  28. pxq-0.1.0/src/pxq/__init__.py +3 -0
  29. pxq-0.1.0/src/pxq/__main__.py +6 -0
  30. pxq-0.1.0/src/pxq/api/__init__.py +15 -0
  31. pxq-0.1.0/src/pxq/api/health.py +17 -0
  32. pxq-0.1.0/src/pxq/api/jobs.py +544 -0
  33. pxq-0.1.0/src/pxq/cli.py +533 -0
  34. pxq-0.1.0/src/pxq/client.py +215 -0
  35. pxq-0.1.0/src/pxq/config.py +54 -0
  36. pxq-0.1.0/src/pxq/config_loader.py +149 -0
  37. pxq-0.1.0/src/pxq/dashboard/__init__.py +5 -0
  38. pxq-0.1.0/src/pxq/dashboard/routes.py +138 -0
  39. pxq-0.1.0/src/pxq/dashboard/templates/base.html +73 -0
  40. pxq-0.1.0/src/pxq/dashboard/templates/index.html +41 -0
  41. pxq-0.1.0/src/pxq/dashboard/templates/job_detail.html +124 -0
  42. pxq-0.1.0/src/pxq/dashboard/templates/partials/job_list.html +56 -0
  43. pxq-0.1.0/src/pxq/dashboard/templates/partials/job_logs.html +82 -0
  44. pxq-0.1.0/src/pxq/executor.py +436 -0
  45. pxq-0.1.0/src/pxq/log_collector.py +451 -0
  46. pxq-0.1.0/src/pxq/models.py +227 -0
  47. pxq-0.1.0/src/pxq/providers/__init__.py +3 -0
  48. pxq-0.1.0/src/pxq/providers/_runpod_client.py +786 -0
  49. pxq-0.1.0/src/pxq/providers/_runpod_exec.py +438 -0
  50. pxq-0.1.0/src/pxq/providers/local_exec.py +180 -0
  51. pxq-0.1.0/src/pxq/providers/runpod_client.py +647 -0
  52. pxq-0.1.0/src/pxq/providers/runpod_exec.py +854 -0
  53. pxq-0.1.0/src/pxq/providers/runpod_gpu_types.py +93 -0
  54. pxq-0.1.0/src/pxq/providers/runpod_provider.py +149 -0
  55. pxq-0.1.0/src/pxq/providers/runpod_ssh.py +143 -0
  56. pxq-0.1.0/src/pxq/recovery.py +91 -0
  57. pxq-0.1.0/src/pxq/scheduler.py +57 -0
  58. pxq-0.1.0/src/pxq/server.py +80 -0
  59. pxq-0.1.0/src/pxq/server_pid.py +302 -0
  60. pxq-0.1.0/src/pxq/storage.py +711 -0
  61. pxq-0.1.0/tests/__init__.py +1 -0
  62. pxq-0.1.0/tests/cli/__init__.py +1 -0
  63. pxq-0.1.0/tests/cli/test_add_ls_status.py +807 -0
  64. pxq-0.1.0/tests/cli/test_cancel_stop.py +348 -0
  65. pxq-0.1.0/tests/cli/test_cli_base.py +115 -0
  66. pxq-0.1.0/tests/cli/test_config_dir_flags.py +518 -0
  67. pxq-0.1.0/tests/cli/test_server_diagnostics.py +246 -0
  68. pxq-0.1.0/tests/cli/test_server_start.py +106 -0
  69. pxq-0.1.0/tests/cli/test_ssh_command.py +148 -0
  70. pxq-0.1.0/tests/conftest.py +127 -0
  71. pxq-0.1.0/tests/dashboard/__init__.py +1 -0
  72. pxq-0.1.0/tests/dashboard/test_dashboard_ui.py +725 -0
  73. pxq-0.1.0/tests/dashboard/test_polling_stability.py +382 -0
  74. pxq-0.1.0/tests/fixtures/config-mount-path.yaml +6 -0
  75. pxq-0.1.0/tests/fixtures/config-volume-path.yaml +6 -0
  76. pxq-0.1.0/tests/integration/__init__.py +1 -0
  77. pxq-0.1.0/tests/integration/test_jobs_api.py +1430 -0
  78. pxq-0.1.0/tests/integration/test_log_collection.py +497 -0
  79. pxq-0.1.0/tests/integration/test_managed_lifecycle.py +884 -0
  80. pxq-0.1.0/tests/integration/test_managed_stop.py +367 -0
  81. pxq-0.1.0/tests/integration/test_nonmanaged_runtime_regression.py +390 -0
  82. pxq-0.1.0/tests/integration/test_parallelism.py +137 -0
  83. pxq-0.1.0/tests/integration/test_provisioning_timeout.py +88 -0
  84. pxq-0.1.0/tests/integration/test_recovery.py +225 -0
  85. pxq-0.1.0/tests/integration/test_release_gate.py +325 -0
  86. pxq-0.1.0/tests/integration/test_stderr_verification.py +291 -0
  87. pxq-0.1.0/tests/integration/test_stop_regressions.py +259 -0
  88. pxq-0.1.0/tests/providers/test_runpod_exec.py +346 -0
  89. pxq-0.1.0/tests/providers/test_runpod_provider.py +65 -0
  90. pxq-0.1.0/tests/providers/test_runpod_ssh.py +219 -0
  91. pxq-0.1.0/tests/unit/__init__.py +1 -0
  92. pxq-0.1.0/tests/unit/test_config.py +63 -0
  93. pxq-0.1.0/tests/unit/test_executor.py +1601 -0
  94. pxq-0.1.0/tests/unit/test_pxqignore.py +97 -0
  95. pxq-0.1.0/tests/unit/test_runpod_client.py +481 -0
  96. pxq-0.1.0/tests/unit/test_runpod_gpu_types.py +66 -0
  97. pxq-0.1.0/tests/unit/test_sanity.py +21 -0
  98. pxq-0.1.0/tests/unit/test_server.py +47 -0
  99. pxq-0.1.0/tests/unit/test_server_pid.py +477 -0
  100. pxq-0.1.0/tests/unit/test_state_machine.py +592 -0
  101. pxq-0.1.0/uv.lock +658 -0
@@ -0,0 +1,162 @@
1
+ # PyPI Release Guide
2
+
3
+ This document describes how to publish `pxq` to PyPI.
4
+
5
+ ## Prerequisites
6
+
7
+ ### 1. PyPI Account
8
+
9
+ Create a PyPI account at https://pypi.org/account/ if you don't have one.
10
+
11
+ ### 2. Trusted Publisher Setup
12
+
13
+ Trusted Publisher allows publishing to PyPI without API tokens, using GitHub OIDC.
14
+
15
+ **Steps:**
16
+
17
+ 1. Go to https://pypi.org/manage/account/publishing/
18
+ 2. Click "Add a trusted publisher"
19
+ 3. Select "GitHub Actions"
20
+ 4. Fill in:
21
+ - **Project name**: `pxq`
22
+ - **Owner**: `takeru1205`
23
+ - **Repository name**: `pxq`
24
+ - **Workflow name**: `publish.yml`
25
+ - **Environment**: `pypi` (or leave blank for all environments)
26
+ 5. Click "Add"
27
+
28
+ ### 3. Create PyPI Project (First Time Only)
29
+
30
+ For the first release:
31
+
32
+ 1. Go to https://pypi.org/project/pxq/
33
+ 2. If the project doesn't exist, create it with the name `pxq`
34
+ 3. Add the Trusted Publisher you created above
35
+
36
+ ## Release Process
37
+
38
+ ### Step 1: Update Version
39
+
40
+ Update the version in `pyproject.toml`:
41
+
42
+ ```toml
43
+ [project]
44
+ name = "pxq"
45
+ version = "0.1.0" # Update this
46
+ ```
47
+
48
+ **Versioning scheme**: Follow [Semantic Versioning](https://semver.org/)
49
+ - `0.1.0` - Initial release
50
+ - `0.1.1` - Bug fix
51
+ - `0.2.0` - New feature
52
+
53
+ ### Step 2: Commit Changes
54
+
55
+ ```bash
56
+ git add .
57
+ git commit -m "Bump version to 0.1.0"
58
+ git push origin main
59
+ ```
60
+
61
+ ### Step 3: Create GitHub Release
62
+
63
+ **Via GitHub Web UI:**
64
+
65
+ 1. Go to https://github.com/takeru1205/pxq/releases
66
+ 2. Click "Draft a new release"
67
+ 3. Fill in:
68
+ - **Tag version**: `v0.1.0` (match the version with `v` prefix)
69
+ - **Release title**: `v0.1.0`
70
+ - **Description**: Add release notes (see template below)
71
+ 4. Click "Publish release"
72
+
73
+ **Via GitHub CLI:**
74
+
75
+ ```bash
76
+ gh release create v0.1.0 \
77
+ --title "v0.1.0" \
78
+ --notes "Release notes here" \
79
+ --generate-notes
80
+ ```
81
+
82
+ ### Step 4: Automatic PyPI Publish
83
+
84
+ Once the release is published:
85
+
86
+ 1. GitHub Actions workflow `.github/workflows/publish.yml` is triggered automatically
87
+ 2. The workflow builds the package and publishes to PyPI
88
+ 3. Monitor the action at: https://github.com/takeru1205/pxq/actions
89
+
90
+ ### Step 5: Verify Publication
91
+
92
+ Check that the package is published:
93
+
94
+ - **PyPI**: https://pypi.org/project/pxq/
95
+ - **Installation test**:
96
+ ```bash
97
+ pip install pxq
98
+ pxq --version
99
+ ```
100
+
101
+ ## Release Notes Template
102
+
103
+ ```markdown
104
+ ## What's Changed
105
+
106
+ ### New Features
107
+ - Feature description
108
+
109
+ ### Bug Fixes
110
+ - Fix description
111
+
112
+ ### Improvements
113
+ - Improvement description
114
+
115
+ ## Installation
116
+
117
+ ```bash
118
+ # From PyPI
119
+ pip install pxq
120
+
121
+ # Or with uv
122
+ uv tool install pxq
123
+
124
+ # From GitHub (latest)
125
+ uv tool install git+https://github.com/takeru1205/pxq.git
126
+ ```
127
+
128
+ **Full Changelog**: https://github.com/takeru1205/pxq/compare/v0.0.0...v0.1.0
129
+ ```
130
+
131
+ ## Troubleshooting
132
+
133
+ ### Workflow Fails
134
+
135
+ **Check the logs at**: https://github.com/takeru1205/pxq/actions
136
+
137
+ Common issues:
138
+ - **Trusted Publisher not configured**: Verify the publisher setup in PyPI
139
+ - **Version already exists**: Bump the version number
140
+ - **Build errors**: Check `uv build` output locally
141
+
142
+ ### Package Name Already Taken
143
+
144
+ If `pxq` is already taken on PyPI:
145
+ - You cannot publish with the same name
146
+ - Consider a different name or contact the current owner
147
+
148
+ ### Manual Publish (Fallback)
149
+
150
+ If Trusted Publisher fails, use API token:
151
+
152
+ ```bash
153
+ # Add token as GitHub Secret: PYPI_TOKEN
154
+ # Then modify publish.yml to use:
155
+ - run: uv publish --token ${{ secrets.PYPI_TOKEN }}
156
+ ```
157
+
158
+ ## Version History
159
+
160
+ | Version | Date | Notes |
161
+ |---------|------|-------|
162
+ | 0.1.0 | TBD | Initial release |
@@ -0,0 +1,33 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write # Required for Trusted Publisher
9
+ contents: read
10
+
11
+ jobs:
12
+ pypi-publish:
13
+ runs-on: ubuntu-latest
14
+ environment:
15
+ name: pypi
16
+ url: https://pypi.org/p/pxq
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v5
22
+ with:
23
+ enable-cache: true
24
+ cache-dependency-glob: "uv.lock"
25
+
26
+ - name: Set up Python
27
+ run: uv python install 3.11
28
+
29
+ - name: Build package
30
+ run: uv build
31
+
32
+ - name: Publish to PyPI
33
+ run: uv publish
pxq-0.1.0/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .sisyphus/
@@ -0,0 +1,15 @@
1
+ {"id":"cell-4qpa5z-mmgc5grykrf","title":"Allow dashboard users to view logs for completed jobs","description":"Enable pxqueue dashboard users to view actual log content (stdout/stderr) for completed jobs. Current system only stores artifact metadata (paths, sizes) without content, making logs inaccessible after pod termination.","status":"open","priority":1,"issue_type":"epic","created_at":"2026-03-07T13:04:50.734Z","updated_at":"2026-03-07T13:04:50.734Z","dependencies":[],"labels":[],"comments":[]}
2
+ {"id":"cell-4qpa5z-mmgc5gs5kvu","title":"Design log content storage schema and add content column to artifacts table","status":"open","priority":3,"issue_type":"task","created_at":"2026-03-07T13:04:50.741Z","updated_at":"2026-03-07T13:04:50.741Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
3
+ {"id":"cell-4qpa5z-mmgc5gs7b5p","title":"Update storage.py to persist actual log content bytes","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.743Z","updated_at":"2026-03-07T13:04:50.743Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
4
+ {"id":"cell-4qpa5z-mmgc5gs7tqe","title":"Add final log collection hook when job transitions to terminal state","status":"open","priority":3,"issue_type":"task","created_at":"2026-03-07T13:04:50.743Z","updated_at":"2026-03-07T13:04:50.743Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
5
+ {"id":"cell-4qpa5z-mmgc5gs8psl","title":"Modify log_collector.py to capture and pass content to create_artifact()","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.744Z","updated_at":"2026-03-07T13:04:50.744Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
6
+ {"id":"cell-4qpa5z-mmgc5gs9tg1","title":"Update dashboard templates to display actual log content","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.745Z","updated_at":"2026-03-07T13:04:50.745Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
7
+ {"id":"cell-4qpa5z-mmgc5gsalpq","title":"Add API endpoint for log content retrieval","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-07T13:04:50.746Z","updated_at":"2026-03-07T13:04:50.746Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
8
+ {"id":"cell-4qpa5z-mmgc5gsblkl","title":"Add integration tests for completed-job log viewing","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-07T13:04:50.747Z","updated_at":"2026-03-07T13:04:50.747Z","parent_id":"cell-4qpa5z-mmgc5grykrf","dependencies":[],"labels":[],"comments":[]}
9
+ {"id":"cell-4qpa5z-mmh9hngzu6t","title":"RunPod stderr verification flow + documentation","description":"Create verification flow for RunPod stderr capture that proves stderr content renders in the dashboard with proper cleanup. Includes: (1) integration test with --run-integration flag, (2) manual verification script, (3) documentation updates to examples/runpod/README.md, (4) DB and dashboard HTML verification commands.","status":"open","priority":1,"issue_type":"epic","created_at":"2026-03-08T04:38:06.611Z","updated_at":"2026-03-08T04:38:06.611Z","dependencies":[],"labels":[],"comments":[]}
10
+ {"id":"cell-4qpa5z-mmh9hnh6ch9","title":"Analyze test_output.py and verify it produces stderr","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.618Z","updated_at":"2026-03-08T04:38:06.618Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
11
+ {"id":"cell-4qpa5z-mmh9hnh99oz","title":"Research existing integration test patterns","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.621Z","updated_at":"2026-03-08T04:38:06.621Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
12
+ {"id":"cell-4qpa5z-mmh9hnhbtwm","title":"Create stderr verification integration test","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-08T04:38:06.623Z","updated_at":"2026-03-08T04:38:06.623Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
13
+ {"id":"cell-4qpa5z-mmh9hnhg33c","title":"Create manual verification script","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.628Z","updated_at":"2026-03-08T04:38:06.628Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
14
+ {"id":"cell-4qpa5z-mmh9hnhgdfp","title":"Update RunPod README with stderr verification section","status":"open","priority":2,"issue_type":"task","created_at":"2026-03-08T04:38:06.628Z","updated_at":"2026-03-08T04:38:06.628Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
15
+ {"id":"cell-4qpa5z-mmh9hnhhl5y","title":"Add DB and dashboard verification commands to test","status":"open","priority":1,"issue_type":"task","created_at":"2026-03-08T04:38:06.629Z","updated_at":"2026-03-08T04:38:06.629Z","parent_id":"cell-4qpa5z-mmh9hngzu6t","dependencies":[],"labels":[],"comments":[]}
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,144 @@
1
+ # Server Identity and Stale PID Reconciliation Design
2
+
3
+ ## Overview
4
+
5
+ This document defines the canonical server identity verification logic for pxq server. The goal is to reliably detect the actual pxq server process even when the PID file contains a stale PID, and to guard against accidentally touching non-pxq processes.
6
+
7
+ ## Problem Statement
8
+
9
+ ### Current Issues
10
+
11
+ 1. **Stale PID File**: The PID file (`~/.pxq/server.pid`) may contain a PID that no longer exists or belongs to a different process.
12
+ 2. **No Identity Verification**: Current implementation only checks if a process with the PID exists via `os.kill(pid, 0)`, but does not verify if it's actually the pxq server.
13
+ 3. **False Positives**: If another process happens to get the same PID, the current code incorrectly assumes it's the pxq server.
14
+ 4. **No Port Ownership Check**: The current code doesn't verify if the process is actually listening on the expected server port.
15
+
16
+ ### Known Issues from Context
17
+
18
+ - Live server (PID 28692) and PID file (dead PID 35336) were mismatched
19
+ - Live server's openapi was missing `/api/jobs/stop` and `/api/jobs/{job_id}/cancel` endpoints
20
+ - `pxq cancel 28` (provisioning) returned 404 because the server wasn't properly detected
21
+
22
+ ## Solution Design
23
+
24
+ ### Core Principles
25
+
26
+ 1. **Port-based Detection**: The true pxq server is identified by the process listening on `server_host:server_port` (default 127.0.0.1:8765).
27
+ 2. **Identity Verification**: Verify the process is actually pxq server by checking its command line contains `uvicorn pxq.server:app`.
28
+ 3. **Stale PID Handling**: Automatically detect and clean up stale PID files.
29
+ 4. **Safety Guard**: Never touch a process that doesn't match the pxq server identity.
30
+
31
+ ### Function Definitions
32
+
33
+ #### `get_pxq_server_pid() -> Optional[int]`
34
+
35
+ Returns the PID of the actual pxq server process, ignoring stale PID files.
36
+
37
+ **Algorithm**:
38
+ 1. Get the configured server port (default 8765)
39
+ 2. Use `lsof -ti:{port}` to find the PID listening on that port
40
+ 3. If no process is listening on the port, return None
41
+ 4. Verify the process is pxq server by checking cmdline contains `uvicorn pxq.server:app` or `pxq.server:app`
42
+ 5. If identity verified, return the PID
43
+ 6. If identity not verified, return None (non-pxq process owns the port)
44
+
45
+ **Edge Cases**:
46
+ - No process listening on port → return None
47
+ - Multiple PIDs from lsof → use the first one (single instance design)
48
+ - Permission denied on lsof → fallback to reading PID file with identity check
49
+ - Non-pxq process owns the port → return None (safety guard)
50
+
51
+ #### `is_pxq_server_running() -> bool`
52
+
53
+ Returns True if the actual pxq server is running.
54
+
55
+ **Implementation**:
56
+ - Returns `get_pxq_server_pid() is not None`
57
+
58
+ #### `cleanup_stale_pid() -> bool`
59
+
60
+ Removes the stale PID file if it doesn't match the actual pxq server.
61
+
62
+ **Algorithm**:
63
+ 1. Get the actual pxq server PID via `get_pxq_server_pid()`
64
+ 2. Read the PID file via `read_pid()`
65
+ 3. If PID file doesn't exist, return False (nothing to clean)
66
+ 4. If actual server PID matches PID file, return False (not stale)
67
+ 5. If actual server PID is None (no server) or different from PID file:
68
+ - Delete the PID file
69
+ - Return True (cleaned up stale file)
70
+
71
+ ### Identity Verification Details
72
+
73
+ **Process Identity Check**:
74
+ - Read `/proc/{pid}/cmdline` (Linux) or use `ps` command (macOS)
75
+ - Check if cmdline contains any of:
76
+ - `uvicorn pxq.server:app`
77
+ - `pxq.server:app`
78
+ - `pxq.server:create_app`
79
+
80
+ **Port Ownership Check**:
81
+ - Use `lsof -ti:{port}` to get PID listening on the port
82
+ - This is more reliable than trusting the PID file
83
+
84
+ ### Platform Support
85
+
86
+ - **macOS**: Use `lsof -ti:{port}` and `ps -p {pid} -o command=`
87
+ - **Linux**: Use `lsof -ti:{port}` and read `/proc/{pid}/cmdline`
88
+
89
+ ### Error Handling
90
+
91
+ 1. **lsof not available**: Fallback to PID file check with identity verification
92
+ 2. **Permission denied**: Handle gracefully, return None
93
+ 3. **Process exits between checks**: Return None
94
+
95
+ ## Testing Strategy
96
+
97
+ ### Unit Tests
98
+
99
+ 1. **Stale PID reconciliation happy path**:
100
+ - PID file contains dead PID
101
+ - Actual pxq server running on port
102
+ - `get_pxq_server_pid()` returns actual server PID
103
+ - `cleanup_stale_pid()` removes stale PID file
104
+
105
+ 2. **Non-pxq listener failure guard**:
106
+ - Non-pxq process listening on port 8765
107
+ - `get_pxq_server_pid()` returns None
108
+ - `is_pxq_server_running()` returns False
109
+
110
+ 3. **Normal operation**:
111
+ - pxq server running, PID file correct
112
+ - All functions work correctly
113
+
114
+ 4. **No server running**:
115
+ - No process on port, no PID file
116
+ - All functions return None/False
117
+
118
+ ### Test Commands
119
+
120
+ ```bash
121
+ # Scenario 1: stale PID reconciliation happy path
122
+ uv run pytest tests/unit -k "server_pid or stale pid or listener" -q | tee .sisyphus/evidence/task-1-server-identity.txt
123
+
124
+ # Scenario 2: non-pxq listener failure guard
125
+ uv run pytest tests/unit -k "non pxq listener or foreign process" -q | tee .sisyphus/evidence/task-1-server-identity-error.txt
126
+ ```
127
+
128
+ ## Dependencies
129
+
130
+ - This task is Wave 1 foundation task
131
+ - Task 2, 3, 4 depend on this task
132
+ - No external dependencies
133
+
134
+ ## Files to Modify
135
+
136
+ - `src/pxq/server_pid.py` - Add new functions
137
+ - `tests/unit/test_server_pid.py` - Add new test cases
138
+ - `src/pxq/cli.py` - Update to use new functions (if needed)
139
+
140
+ ## Backward Compatibility
141
+
142
+ - Existing functions (`get_server_pid()`, `is_server_running()`) remain unchanged
143
+ - New functions are additions, not replacements
144
+ - CLI commands continue to work with existing functions initially
pxq-0.1.0/.tmp/task.md ADDED
@@ -0,0 +1,70 @@
1
+ # Task List: Server Identity and Stale PID Reconciliation
2
+
3
+ ## Wave 1: Foundation Task
4
+
5
+ ### Task 1: Define canonical server identity and stale-state reconciliation
6
+
7
+ **Status**: In Progress
8
+
9
+ **Description**:
10
+ Define the canonical server identity verification logic. Add functions to `src/pxq/server_pid.py` to:
11
+ - Detect actual pxq server process by port ownership and cmdline identity
12
+ - Clean up stale PID files automatically
13
+ - Guard against touching non-pxq processes
14
+
15
+ **Subtasks**:
16
+
17
+ - [x] Create design document (.tmp/design.md)
18
+ - [ ] Create task list (.tmp/task.md)
19
+ - [ ] Implement `get_pxq_server_pid()` function
20
+ - Use `lsof -ti:{port}` to find process listening on server port
21
+ - Verify process identity via cmdline check
22
+ - Return None if no pxq server found or non-pxq process owns port
23
+ - [ ] Implement `is_pxq_server_running()` function
24
+ - Wrapper around `get_pxq_server_pid()`
25
+ - [ ] Implement `cleanup_stale_pid()` function
26
+ - Compare PID file with actual server PID
27
+ - Remove stale PID file
28
+ - [ ] Add unit tests for new functions
29
+ - Test stale PID reconciliation happy path
30
+ - Test non-pxq listener failure guard
31
+ - Test normal operation
32
+ - Test no server running
33
+ - [ ] Run pytest and verify all tests pass
34
+ - [ ] Save pytest output to .sisyphus/evidence/
35
+ - [ ] Create learnings document
36
+
37
+ **Dependencies**: None (foundation task)
38
+
39
+ **Dependent Tasks**:
40
+ - Task 2: TBD
41
+ - Task 3: TBD
42
+ - Task 4: TBD
43
+
44
+ **Files to Modify**:
45
+ - `src/pxq/server_pid.py` - Add new functions
46
+ - `tests/unit/test_server_pid.py` - Add test cases
47
+
48
+ **Test Commands**:
49
+ ```bash
50
+ # Run all server_pid tests
51
+ uv run pytest tests/unit/test_server_pid.py -v
52
+
53
+ # Run specific test categories
54
+ uv run pytest tests/unit -k "server_pid or stale pid or listener" -q
55
+ ```
56
+
57
+ **Acceptance Criteria**:
58
+ - [ ] `get_pxq_server_pid()` returns actual pxq server PID (not stale PID)
59
+ - [ ] `get_pxq_server_pid()` returns None for non-pxq process on port
60
+ - [ ] `is_pxq_server_running()` accurately reflects pxq server status
61
+ - [ ] `cleanup_stale_pid()` removes stale PID files
62
+ - [ ] All existing tests continue to pass (no regression)
63
+ - [ ] New tests cover all scenarios
64
+ - [ ] pytest output saved to .sisyphus/evidence/
65
+
66
+ ## Notes
67
+
68
+ - Platform: macOS (primary), Linux (secondary)
69
+ - Default server port: 8765
70
+ - Server identity: process running `uvicorn pxq.server:app`