gitarsenal-cli 1.9.71 → 1.9.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.venv_status.json +1 -1
- package/bin/gitarsenal.js +8 -31
- package/kill_claude/prompts/claude-code-system-prompt.md +13 -0
- package/kill_claude/prompts/claude-code-tool-prompts.md +1 -0
- package/kill_claude/tools/__pycache__/task_tool.cpython-313.pyc +0 -0
- package/kill_claude/tools/bash_tool.py +1 -0
- package/lib/sandbox.js +1 -8
- package/package.json +1 -1
- package/python/debug_modal_minimal.py +212 -0
- package/python/test_container.py +108 -17
- package/python/test_modalSandboxScript.py +65 -1097
package/.venv_status.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"created":"2025-08-
|
|
1
|
+
{"created":"2025-08-15T09:00:00.169Z","packages":["modal","gitingest","requests","anthropic"],"uv_version":"uv 0.8.4 (Homebrew 2025-07-30)"}
|
package/bin/gitarsenal.js
CHANGED
|
@@ -747,7 +747,6 @@ async function runContainerCommand(options) {
|
|
|
747
747
|
let gpuCount = parseInt(options.gpuCount) || 1;
|
|
748
748
|
let volumeName = options.volumeName || options.volume;
|
|
749
749
|
let skipConfirmation = options.yes;
|
|
750
|
-
let useApi = !options.manual;
|
|
751
750
|
let setupCommands = options.setupCommands || [];
|
|
752
751
|
|
|
753
752
|
if (!repoUrl) {
|
|
@@ -762,8 +761,8 @@ async function runContainerCommand(options) {
|
|
|
762
761
|
repoUrl = answers.repoUrl;
|
|
763
762
|
}
|
|
764
763
|
|
|
765
|
-
//
|
|
766
|
-
if (
|
|
764
|
+
// Analyze repository for GPU recommendations (repository setup is now handled by Agent)
|
|
765
|
+
if (repoUrl) {
|
|
767
766
|
// Start a main spinner that will show overall progress
|
|
768
767
|
const mainSpinner = ora('Analyzing repository...').start();
|
|
769
768
|
|
|
@@ -783,11 +782,7 @@ async function runContainerCommand(options) {
|
|
|
783
782
|
previewAbort.abort();
|
|
784
783
|
mainSpinner.succeed('Analysis complete!');
|
|
785
784
|
printGpuTorchCudaSummary(fullData);
|
|
786
|
-
|
|
787
|
-
setupCommands = fullData.commands;
|
|
788
|
-
// Disable auto-detection since we already have commands
|
|
789
|
-
useApi = false;
|
|
790
|
-
}
|
|
785
|
+
// Repository setup will be handled by Agent in container
|
|
791
786
|
} else {
|
|
792
787
|
// Full fetch failed, wait for preview and show its results
|
|
793
788
|
mainSpinner.text = 'Waiting for preview analysis to complete...';
|
|
@@ -798,13 +793,13 @@ async function runContainerCommand(options) {
|
|
|
798
793
|
} else {
|
|
799
794
|
mainSpinner.fail('Analysis failed - both preview and full analysis timed out or failed');
|
|
800
795
|
console.log(chalk.yellow('⚠️ Unable to analyze repository automatically.'));
|
|
801
|
-
console.log(chalk.gray('
|
|
796
|
+
console.log(chalk.gray('Repository setup will still be handled by Agent in container.'));
|
|
802
797
|
}
|
|
803
798
|
}
|
|
804
799
|
} catch (error) {
|
|
805
800
|
mainSpinner.fail(`Analysis failed: ${error.message}`);
|
|
806
801
|
console.log(chalk.yellow('⚠️ Unable to analyze repository automatically.'));
|
|
807
|
-
console.log(chalk.gray('
|
|
802
|
+
console.log(chalk.gray('Repository setup will still be handled by Agent in container.'));
|
|
808
803
|
}
|
|
809
804
|
}
|
|
810
805
|
|
|
@@ -879,31 +874,14 @@ async function runContainerCommand(options) {
|
|
|
879
874
|
volumeName = getDefaultVolumeName(repoUrl);
|
|
880
875
|
}
|
|
881
876
|
|
|
882
|
-
//
|
|
883
|
-
if (!
|
|
884
|
-
const apiAnswers = await inquirer.prompt([
|
|
885
|
-
{
|
|
886
|
-
type: 'confirm',
|
|
887
|
-
name: 'useApi',
|
|
888
|
-
message: 'Automatically detect setup commands for this repository?',
|
|
889
|
-
default: true
|
|
890
|
-
}
|
|
891
|
-
]);
|
|
892
|
-
|
|
893
|
-
useApi = apiAnswers.useApi;
|
|
894
|
-
} else if (options.yes) {
|
|
895
|
-
// If --yes flag is used, default to using API for setup command detection
|
|
896
|
-
useApi = true;
|
|
897
|
-
}
|
|
898
|
-
|
|
899
|
-
// Only prompt for custom commands if auto-detection is disabled and no commands provided
|
|
900
|
-
if (!useApi && setupCommands.length === 0) {
|
|
877
|
+
// Prompt for custom setup commands only if no repo URL provided and no commands specified
|
|
878
|
+
if (!repoUrl && !options.manual && setupCommands.length === 0) {
|
|
901
879
|
const setupAnswers = await inquirer.prompt([
|
|
902
880
|
{
|
|
903
881
|
type: 'confirm',
|
|
904
882
|
name: 'useCustomCommands',
|
|
905
883
|
message: 'Provide custom setup commands?',
|
|
906
|
-
default:
|
|
884
|
+
default: false
|
|
907
885
|
}
|
|
908
886
|
]);
|
|
909
887
|
|
|
@@ -953,7 +931,6 @@ async function runContainerCommand(options) {
|
|
|
953
931
|
gpuCount,
|
|
954
932
|
volumeName,
|
|
955
933
|
setupCommands,
|
|
956
|
-
useApi,
|
|
957
934
|
yes: skipConfirmation,
|
|
958
935
|
userId,
|
|
959
936
|
userName,
|
|
@@ -74,6 +74,19 @@ When making changes to files, first understand the file's code conventions. Mimi
|
|
|
74
74
|
- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
|
|
75
75
|
- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
|
|
76
76
|
|
|
77
|
+
### Python Package Management
|
|
78
|
+
|
|
79
|
+
- **ALWAYS use `uv pip` instead of `pip` for Python package installations**
|
|
80
|
+
- Use `uv` commands whenever possible for Python development tasks:
|
|
81
|
+
- `uv pip install` instead of `pip install`
|
|
82
|
+
- `uv pip freeze` instead of `pip freeze`
|
|
83
|
+
- `uv pip list` instead of `pip list`
|
|
84
|
+
- `uv run` for running Python scripts with dependency management
|
|
85
|
+
- `uv sync` for synchronizing dependencies
|
|
86
|
+
- `uv add` for adding new dependencies to projects
|
|
87
|
+
- When creating new Python projects, prefer `uv init` over other initialization methods
|
|
88
|
+
- Use `uv venv` for virtual environment creation when needed
|
|
89
|
+
|
|
77
90
|
## Code Style
|
|
78
91
|
|
|
79
92
|
- **IMPORTANT**: DO NOT ADD ***ANY*** COMMENTS unless asked
|
|
@@ -55,6 +55,7 @@ Usage notes:
|
|
|
55
55
|
- You can use the `run_in_background` parameter to run the command in the background, which allows you to continue working while the command runs. You can monitor the output using the Bash tool as it becomes available. Never use `run_in_background` to run 'sleep' as it will return immediately. You do not need to use '&' at the end of the command when using this parameter.
|
|
56
56
|
- VERY IMPORTANT: You MUST avoid using search commands like `find` and `grep`. Instead use Grep, Glob, or Task to search. You MUST avoid read tools like `cat`, `head`, `tail`, and `ls`, and use Read and LS to read files.
|
|
57
57
|
- If you _still_ need to run `grep`, STOP. ALWAYS USE ripgrep at `rg` first, which all Claude Code users have pre-installed.
|
|
58
|
+
- IMPORTANT: Always use `uv pip` instead of just `pip` when running pip commands. For example, use `uv pip install package_name` instead of `pip install package_name`.
|
|
58
59
|
- When issuing multiple commands, use the ';' or '&&' operator to separate them. DO NOT use newlines (newlines are ok in quoted strings).
|
|
59
60
|
- Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.
|
|
60
61
|
|
|
Binary file
|
|
@@ -28,6 +28,7 @@ Usage notes:
|
|
|
28
28
|
- You can use the `run_in_background` parameter to run the command in the background, which allows you to continue working while the command runs. You can monitor the output using the Bash tool as it becomes available. Never use `run_in_background` to run 'sleep' as it will return immediately. You do not need to use '&' at the end of the command when using this parameter.
|
|
29
29
|
- VERY IMPORTANT: You MUST avoid using search commands like `find` and `grep`. Instead use Grep, Glob, or Task to search. You MUST avoid read tools like `cat`, `head`, `tail`, and `ls`, and use Read and LS to read files.
|
|
30
30
|
- If you _still_ need to run `grep`, STOP. ALWAYS USE ripgrep at `rg` first, which all Claude Code users have pre-installed.
|
|
31
|
+
- IMPORTANT: Always use `uv pip` instead of just `pip` when running pip commands. For example, use `uv pip install package_name` instead of `pip install package_name`.
|
|
31
32
|
- When issuing multiple commands, use the ';' or '&&' operator to separate them. DO NOT use newlines (newlines are ok in quoted strings).
|
|
32
33
|
- Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.
|
|
33
34
|
"""
|
package/lib/sandbox.js
CHANGED
|
@@ -33,7 +33,6 @@ function getPythonScriptPath() {
|
|
|
33
33
|
* @param {number} options.gpuCount - Number of GPUs (default: 1)
|
|
34
34
|
* @param {string} options.volumeName - Volume name
|
|
35
35
|
* @param {Array<string>} options.setupCommands - Setup commands
|
|
36
|
-
* @param {boolean} options.useApi - Whether to use the API to fetch setup commands
|
|
37
36
|
* @param {boolean} options.showExamples - Whether to show usage examples
|
|
38
37
|
* @returns {Promise<void>}
|
|
39
38
|
*/
|
|
@@ -44,7 +43,6 @@ async function runContainer(options) {
|
|
|
44
43
|
gpuCount = 1,
|
|
45
44
|
volumeName,
|
|
46
45
|
setupCommands = [],
|
|
47
|
-
useApi = true,
|
|
48
46
|
showExamples = false,
|
|
49
47
|
yes = false,
|
|
50
48
|
userId,
|
|
@@ -103,10 +101,7 @@ async function runContainer(options) {
|
|
|
103
101
|
args.push('--volume-name', volumeName);
|
|
104
102
|
}
|
|
105
103
|
|
|
106
|
-
//
|
|
107
|
-
if (useApi) {
|
|
108
|
-
args.push('--use-api');
|
|
109
|
-
}
|
|
104
|
+
// Repository setup is now handled by Agent when --repo-url is provided
|
|
110
105
|
|
|
111
106
|
// Add --yes flag to skip confirmation prompts
|
|
112
107
|
if (yes) {
|
|
@@ -128,8 +123,6 @@ async function runContainer(options) {
|
|
|
128
123
|
const tempCommandsFile = path.join(os.tmpdir(), `gitarsenal-commands-${Date.now()}.txt`);
|
|
129
124
|
fs.writeFileSync(tempCommandsFile, setupCommands.join('\n'));
|
|
130
125
|
args.push('--commands-file', tempCommandsFile);
|
|
131
|
-
// Ensure Python skips auto-detection via GitIngest when commands are provided
|
|
132
|
-
args.push('--no-gitingest');
|
|
133
126
|
}
|
|
134
127
|
|
|
135
128
|
// Log the command being executed
|
package/package.json
CHANGED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Minimal Modal container test to debug segmentation fault issues.
|
|
4
|
+
This script tests different base images and configurations to isolate the problem.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import modal
|
|
10
|
+
import datetime
|
|
11
|
+
|
|
12
|
+
# Set up Modal tokens
|
|
13
|
+
from fetch_modal_tokens import get_tokens
|
|
14
|
+
token_id, token_secret, openai_api_key, anthropic_api_key, openrouter_api_key, groq_api_key = get_tokens()
|
|
15
|
+
|
|
16
|
+
if token_id is None or token_secret is None:
|
|
17
|
+
raise ValueError("Could not get valid tokens")
|
|
18
|
+
|
|
19
|
+
os.environ["MODAL_TOKEN_ID"] = token_id
|
|
20
|
+
os.environ["MODAL_TOKEN_SECRET"] = token_secret
|
|
21
|
+
|
|
22
|
+
def test_minimal_container():
|
|
23
|
+
"""Test 1: Minimal container with debian base"""
|
|
24
|
+
print("🔬 Testing minimal Debian container...")
|
|
25
|
+
|
|
26
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
27
|
+
app_name = f"debug-minimal-{timestamp}"
|
|
28
|
+
|
|
29
|
+
# Minimal debian image
|
|
30
|
+
minimal_image = (
|
|
31
|
+
modal.Image.debian_slim()
|
|
32
|
+
.apt_install("curl", "wget")
|
|
33
|
+
.pip_install("requests")
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
app = modal.App(app_name, image=minimal_image)
|
|
37
|
+
|
|
38
|
+
@app.function(timeout=300, serialized=True)
|
|
39
|
+
def minimal_test():
|
|
40
|
+
print("✅ Minimal container started successfully!")
|
|
41
|
+
import sys
|
|
42
|
+
print(f"Python version: {sys.version}")
|
|
43
|
+
import platform
|
|
44
|
+
print(f"Platform: {platform.platform()}")
|
|
45
|
+
return "success"
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
with app.run():
|
|
49
|
+
result = minimal_test.remote()
|
|
50
|
+
print(f"✅ Minimal test result: {result}")
|
|
51
|
+
return True
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f"❌ Minimal test failed: {e}")
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
def test_cuda_base():
|
|
57
|
+
"""Test 2: CUDA base without additional packages"""
|
|
58
|
+
print("🔬 Testing CUDA base image...")
|
|
59
|
+
|
|
60
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
61
|
+
app_name = f"debug-cuda-{timestamp}"
|
|
62
|
+
|
|
63
|
+
# CUDA base image without extras
|
|
64
|
+
cuda_image = modal.Image.from_registry("nvidia/cuda:12.4.0-runtime-ubuntu22.04", add_python="3.11")
|
|
65
|
+
|
|
66
|
+
app = modal.App(app_name, image=cuda_image)
|
|
67
|
+
|
|
68
|
+
@app.function(timeout=300, gpu="t4", serialized=True)
|
|
69
|
+
def cuda_test():
|
|
70
|
+
print("✅ CUDA container started successfully!")
|
|
71
|
+
import sys
|
|
72
|
+
print(f"Python version: {sys.version}")
|
|
73
|
+
|
|
74
|
+
# Test CUDA availability
|
|
75
|
+
try:
|
|
76
|
+
import subprocess
|
|
77
|
+
result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
|
|
78
|
+
print(f"NVIDIA-SMI output: {result.stdout[:200]}...")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"NVIDIA-SMI error: {e}")
|
|
81
|
+
|
|
82
|
+
return "cuda_success"
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
with app.run():
|
|
86
|
+
result = cuda_test.remote()
|
|
87
|
+
print(f"✅ CUDA test result: {result}")
|
|
88
|
+
return True
|
|
89
|
+
except Exception as e:
|
|
90
|
+
print(f"❌ CUDA test failed: {e}")
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
def test_cuda_devel():
|
|
94
|
+
"""Test 3: CUDA devel image (current failing one)"""
|
|
95
|
+
print("🔬 Testing CUDA devel image...")
|
|
96
|
+
|
|
97
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
98
|
+
app_name = f"debug-cuda-devel-{timestamp}"
|
|
99
|
+
|
|
100
|
+
# CUDA devel image (the one that's failing)
|
|
101
|
+
cuda_devel_image = modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
|
|
102
|
+
|
|
103
|
+
app = modal.App(app_name, image=cuda_devel_image)
|
|
104
|
+
|
|
105
|
+
@app.function(timeout=300, gpu="t4", serialized=True)
|
|
106
|
+
def cuda_devel_test():
|
|
107
|
+
print("✅ CUDA devel container started successfully!")
|
|
108
|
+
import sys
|
|
109
|
+
print(f"Python version: {sys.version}")
|
|
110
|
+
return "cuda_devel_success"
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
with app.run():
|
|
114
|
+
result = cuda_devel_test.remote()
|
|
115
|
+
print(f"✅ CUDA devel test result: {result}")
|
|
116
|
+
return True
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"❌ CUDA devel test failed: {e}")
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
def test_with_packages():
|
|
122
|
+
"""Test 4: Add packages incrementally"""
|
|
123
|
+
print("🔬 Testing with SSH packages...")
|
|
124
|
+
|
|
125
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
126
|
+
app_name = f"debug-ssh-{timestamp}"
|
|
127
|
+
|
|
128
|
+
# Add SSH packages to working base
|
|
129
|
+
ssh_image = (
|
|
130
|
+
modal.Image.debian_slim()
|
|
131
|
+
.apt_install("openssh-server", "sudo", "curl")
|
|
132
|
+
.pip_install("requests")
|
|
133
|
+
.run_commands(
|
|
134
|
+
"mkdir -p /var/run/sshd",
|
|
135
|
+
"ssh-keygen -A"
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
app = modal.App(app_name, image=ssh_image)
|
|
140
|
+
|
|
141
|
+
@app.function(timeout=300, serialized=True)
|
|
142
|
+
def ssh_test():
|
|
143
|
+
print("✅ SSH container started successfully!")
|
|
144
|
+
import subprocess
|
|
145
|
+
result = subprocess.run(["service", "ssh", "status"], capture_output=True, text=True)
|
|
146
|
+
print(f"SSH service status: {result.returncode}")
|
|
147
|
+
return "ssh_success"
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
with app.run():
|
|
151
|
+
result = ssh_test.remote()
|
|
152
|
+
print(f"✅ SSH test result: {result}")
|
|
153
|
+
return True
|
|
154
|
+
except Exception as e:
|
|
155
|
+
print(f"❌ SSH test failed: {e}")
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def main():
|
|
159
|
+
"""Run all tests to isolate the segfault issue"""
|
|
160
|
+
print("🐛 DEBUGGING MODAL SEGMENTATION FAULT")
|
|
161
|
+
print("=" * 50)
|
|
162
|
+
|
|
163
|
+
tests = [
|
|
164
|
+
("Minimal Debian", test_minimal_container),
|
|
165
|
+
("CUDA Runtime", test_cuda_base),
|
|
166
|
+
("CUDA Devel", test_cuda_devel),
|
|
167
|
+
("SSH Packages", test_with_packages),
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
results = {}
|
|
171
|
+
|
|
172
|
+
for test_name, test_func in tests:
|
|
173
|
+
print(f"\n{'=' * 20} {test_name} {'=' * 20}")
|
|
174
|
+
try:
|
|
175
|
+
results[test_name] = test_func()
|
|
176
|
+
except Exception as e:
|
|
177
|
+
print(f"❌ {test_name} failed with exception: {e}")
|
|
178
|
+
results[test_name] = False
|
|
179
|
+
|
|
180
|
+
print(f"\n{'=' * 50}")
|
|
181
|
+
print("🔍 TEST RESULTS SUMMARY:")
|
|
182
|
+
print("=" * 50)
|
|
183
|
+
|
|
184
|
+
for test_name, success in results.items():
|
|
185
|
+
status = "✅ PASS" if success else "❌ FAIL"
|
|
186
|
+
print(f"{test_name:<20} {status}")
|
|
187
|
+
|
|
188
|
+
# Analysis
|
|
189
|
+
print(f"\n{'=' * 50}")
|
|
190
|
+
print("📊 ANALYSIS:")
|
|
191
|
+
print("=" * 50)
|
|
192
|
+
|
|
193
|
+
if results.get("Minimal Debian", False):
|
|
194
|
+
print("✅ Basic Modal functionality works")
|
|
195
|
+
else:
|
|
196
|
+
print("❌ Basic Modal functionality broken - check Modal setup")
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
if results.get("CUDA Runtime", False):
|
|
200
|
+
print("✅ CUDA runtime works")
|
|
201
|
+
else:
|
|
202
|
+
print("❌ CUDA runtime broken - GPU/CUDA issue")
|
|
203
|
+
|
|
204
|
+
if not results.get("CUDA Devel", False):
|
|
205
|
+
print("❌ CUDA devel broken - likely the source of segfault")
|
|
206
|
+
print("💡 RECOMMENDATION: Use CUDA runtime instead of devel")
|
|
207
|
+
|
|
208
|
+
if results.get("SSH Packages", False):
|
|
209
|
+
print("✅ SSH packages work on debian")
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
main()
|
package/python/test_container.py
CHANGED
|
@@ -3,15 +3,20 @@ import time
|
|
|
3
3
|
import secrets
|
|
4
4
|
import string
|
|
5
5
|
import modal
|
|
6
|
+
import sys
|
|
6
7
|
|
|
7
8
|
def generate_random_password(length=16):
|
|
8
9
|
"""Generate a random password for SSH access"""
|
|
10
|
+
print(f"[DEBUG] Generating random password of length {length}")
|
|
9
11
|
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
|
|
10
12
|
password = ''.join(secrets.choice(alphabet) for i in range(length))
|
|
13
|
+
print(f"[DEBUG] Password generated successfully (length: {len(password)})")
|
|
11
14
|
return password
|
|
12
15
|
|
|
16
|
+
print("[DEBUG] Starting container setup...")
|
|
17
|
+
|
|
13
18
|
image = (
|
|
14
|
-
modal.Image.from_registry("nvidia/cuda:12.
|
|
19
|
+
modal.Image.from_registry("nvidia/cuda:12.2.0-devel-ubuntu22.04", add_python="3.11")
|
|
15
20
|
.apt_install("openssh-server", "sudo", "curl", "wget", "git", "vim", "htop", "tmux", "nvtop")
|
|
16
21
|
.pip_install("cupy-cuda12x", "torch", "transformers")
|
|
17
22
|
.run_commands(
|
|
@@ -23,32 +28,118 @@ image = (
|
|
|
23
28
|
)
|
|
24
29
|
)
|
|
25
30
|
|
|
31
|
+
print("[DEBUG] Image configuration completed")
|
|
32
|
+
|
|
26
33
|
app = modal.App("cuda-ssh-container", image=image)
|
|
34
|
+
print("[DEBUG] Modal app created")
|
|
27
35
|
|
|
28
36
|
@app.function(gpu="A10G", timeout=3600)
|
|
29
37
|
def start_ssh():
|
|
38
|
+
print("[DEBUG] Starting SSH function...")
|
|
39
|
+
|
|
30
40
|
# Generate SSH password
|
|
31
|
-
|
|
32
|
-
|
|
41
|
+
print("[DEBUG] Step 1: Generating SSH password")
|
|
42
|
+
try:
|
|
43
|
+
password = generate_random_password()
|
|
44
|
+
print(f"[DEBUG] Password generation successful")
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"[ERROR] Failed to generate password: {e}")
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
# Set root password
|
|
50
|
+
print("[DEBUG] Step 2: Setting root password")
|
|
51
|
+
try:
|
|
52
|
+
result = subprocess.run(["bash", "-c", f"echo 'root:{password}' | chpasswd"],
|
|
53
|
+
check=True, capture_output=True, text=True)
|
|
54
|
+
print(f"[DEBUG] Password set successfully. Return code: {result.returncode}")
|
|
55
|
+
except subprocess.CalledProcessError as e:
|
|
56
|
+
print(f"[ERROR] Failed to set password: {e}")
|
|
57
|
+
print(f"[ERROR] stdout: {e.stdout}")
|
|
58
|
+
print(f"[ERROR] stderr: {e.stderr}")
|
|
59
|
+
return
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(f"[ERROR] Unexpected error setting password: {e}")
|
|
62
|
+
return
|
|
33
63
|
|
|
34
64
|
# Start SSH server
|
|
35
|
-
|
|
36
|
-
|
|
65
|
+
print("[DEBUG] Step 3: Starting SSH server")
|
|
66
|
+
try:
|
|
67
|
+
ssh_process = subprocess.Popen(["/usr/sbin/sshd", "-D"])
|
|
68
|
+
print(f"[DEBUG] SSH server started with PID: {ssh_process.pid}")
|
|
69
|
+
time.sleep(2)
|
|
70
|
+
print("[DEBUG] Waited 2 seconds for SSH server to initialize")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
print(f"[ERROR] Failed to start SSH server: {e}")
|
|
73
|
+
return
|
|
37
74
|
|
|
38
75
|
# Test CUDA
|
|
39
|
-
|
|
40
|
-
|
|
76
|
+
print("[DEBUG] Step 4: Testing CUDA availability")
|
|
77
|
+
try:
|
|
78
|
+
print("[DEBUG] Running nvidia-smi...")
|
|
79
|
+
nvidia_result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=30)
|
|
80
|
+
print(f"[DEBUG] nvidia-smi return code: {nvidia_result.returncode}")
|
|
81
|
+
if nvidia_result.stdout:
|
|
82
|
+
print(f"[DEBUG] nvidia-smi output:\n{nvidia_result.stdout}")
|
|
83
|
+
if nvidia_result.stderr:
|
|
84
|
+
print(f"[DEBUG] nvidia-smi stderr:\n{nvidia_result.stderr}")
|
|
85
|
+
except subprocess.TimeoutExpired:
|
|
86
|
+
print("[ERROR] nvidia-smi timed out")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
print(f"[ERROR] nvidia-smi failed: {e}")
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
print("[DEBUG] Running nvcc --version...")
|
|
92
|
+
nvcc_result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, timeout=30)
|
|
93
|
+
print(f"[DEBUG] nvcc return code: {nvcc_result.returncode}")
|
|
94
|
+
if nvcc_result.stdout:
|
|
95
|
+
print(f"[DEBUG] nvcc output:\n{nvcc_result.stdout}")
|
|
96
|
+
if nvcc_result.stderr:
|
|
97
|
+
print(f"[DEBUG] nvcc stderr:\n{nvcc_result.stderr}")
|
|
98
|
+
except subprocess.TimeoutExpired:
|
|
99
|
+
print("[ERROR] nvcc timed out")
|
|
100
|
+
except Exception as e:
|
|
101
|
+
print(f"[ERROR] nvcc failed: {e}")
|
|
41
102
|
|
|
42
103
|
# Forward SSH port
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
104
|
+
print("[DEBUG] Step 5: Setting up SSH port forwarding")
|
|
105
|
+
try:
|
|
106
|
+
with modal.forward(port=22, unencrypted=True) as tunnel:
|
|
107
|
+
hostname, port = tunnel.tcp_socket
|
|
108
|
+
print(f"[DEBUG] Tunnel established successfully")
|
|
109
|
+
print(f"[DEBUG] Hostname: {hostname}, Port: {port}")
|
|
110
|
+
print(f"\n" + "="*50)
|
|
111
|
+
print(f"SSH CONNECTION INFO:")
|
|
112
|
+
print(f"SSH: ssh -p {port} root@{hostname}")
|
|
113
|
+
print(f"Password: {password}")
|
|
114
|
+
print(f"="*50 + "\n")
|
|
115
|
+
|
|
116
|
+
# Keep alive with periodic status updates
|
|
117
|
+
print("[DEBUG] Starting keep-alive loop...")
|
|
118
|
+
counter = 0
|
|
119
|
+
while True:
|
|
120
|
+
counter += 1
|
|
121
|
+
print(f"[DEBUG] Keep-alive cycle {counter} - Container still running")
|
|
122
|
+
time.sleep(60)
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
print(f"[ERROR] Failed to set up port forwarding: {e}")
|
|
126
|
+
import traceback
|
|
127
|
+
traceback.print_exc()
|
|
128
|
+
return
|
|
51
129
|
|
|
52
130
|
if __name__ == "__main__":
|
|
53
|
-
|
|
54
|
-
|
|
131
|
+
print("[DEBUG] Script starting from main...")
|
|
132
|
+
print(f"[DEBUG] Python version: {sys.version}")
|
|
133
|
+
print(f"[DEBUG] Modal version: {modal.__version__}")
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
print("[DEBUG] Starting modal app...")
|
|
137
|
+
with app.run():
|
|
138
|
+
print("[DEBUG] Modal app context established")
|
|
139
|
+
start_ssh.remote()
|
|
140
|
+
except Exception as e:
|
|
141
|
+
print(f"[ERROR] Failed to run modal app: {e}")
|
|
142
|
+
import traceback
|
|
143
|
+
traceback.print_exc()
|
|
144
|
+
|
|
145
|
+
print("[DEBUG] Script completed")
|