flowmesh-cli-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowmesh_cli_stack/__init__.py +13 -0
- flowmesh_cli_stack/assets/.env.example +204 -0
- flowmesh_cli_stack/assets/compose.yml +201 -0
- flowmesh_cli_stack/assets/docker-bake.hcl +110 -0
- flowmesh_cli_stack/bundle.py +384 -0
- flowmesh_cli_stack/env_schema.py +646 -0
- flowmesh_cli_stack/stack.py +789 -0
- flowmesh_cli_stack/utils.py +137 -0
- flowmesh_cli_stack/worker.py +235 -0
- flowmesh_cli_stack-0.1.0.dist-info/METADATA +25 -0
- flowmesh_cli_stack-0.1.0.dist-info/RECORD +14 -0
- flowmesh_cli_stack-0.1.0.dist-info/WHEEL +5 -0
- flowmesh_cli_stack-0.1.0.dist-info/licenses/LICENSE +202 -0
- flowmesh_cli_stack-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Stack management package for FlowMesh CLI."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from .bundle import app as bundle_app
|
|
6
|
+
from .stack import app as stack_app
|
|
7
|
+
from .worker import app as worker_app
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def register(root: typer.Typer) -> None:
|
|
11
|
+
root.add_typer(stack_app, name="stack")
|
|
12
|
+
stack_app.add_typer(worker_app, name="worker")
|
|
13
|
+
stack_app.add_typer(bundle_app, name="bundle")
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# FlowMesh Stack Configuration
|
|
2
|
+
# Copy to .env and adjust as needed
|
|
3
|
+
|
|
4
|
+
# ==== Image Source ====
|
|
5
|
+
FLOWMESH_REGISTRY=ghcr.io/mlsys-io
|
|
6
|
+
FLOWMESH_VERSION=dev
|
|
7
|
+
# Optional registry cache lineage for stack push.
|
|
8
|
+
# Leave empty to use the default stable cache scope.
|
|
9
|
+
FLOWMESH_CACHE_VERSION=
|
|
10
|
+
FLOWMESH_BUILD_REF=local
|
|
11
|
+
|
|
12
|
+
# ==== Node Identity ====
|
|
13
|
+
# Optional suffix appended to stack-managed Docker object names.
|
|
14
|
+
# Use a distinct suffix per local stack on shared hosts.
|
|
15
|
+
# This isolates container, network, and volume names,
|
|
16
|
+
# but each stack still needs unique ports.
|
|
17
|
+
FLOWMESH_STACK_SUFFIX=
|
|
18
|
+
NODE_ROLE=root
|
|
19
|
+
NODE_NAMESPACE=flowmesh
|
|
20
|
+
NODE_CLUSTER=dev
|
|
21
|
+
NODE_ALIAS=node
|
|
22
|
+
NODE_TAGS=
|
|
23
|
+
ENABLE_SUPERVISOR=true
|
|
24
|
+
SERVER_HOST=localhost
|
|
25
|
+
SERVER_HTTP_PORT=8000
|
|
26
|
+
SERVER_GRPC_PORT=50051
|
|
27
|
+
SERVER_LOG_LEVEL=INFO
|
|
28
|
+
|
|
29
|
+
# ==== Server gRPC TLS ====
|
|
30
|
+
# Leave empty to disable
|
|
31
|
+
SERVER_TLS_DIR=./secrets/tls/server
|
|
32
|
+
SERVER_GRPC_TLS_CA_FILE=/etc/ssl/server/server-ca.pem
|
|
33
|
+
SERVER_GRPC_TLS_CERT_FILE=/etc/ssl/server/server.pem
|
|
34
|
+
SERVER_GRPC_TLS_KEY_FILE=/etc/ssl/server/server.key
|
|
35
|
+
|
|
36
|
+
# ==== Supervisor gRPC ====
|
|
37
|
+
# Tuning for the supervisor's gRPC server and worker connections.
|
|
38
|
+
# Leave SUPERVISOR_GRPC_EXTERNAL_PORT empty unless workers connect
|
|
39
|
+
# through a port-forwarded / proxied address.
|
|
40
|
+
SUPERVISOR_GRPC_DISABLE_SERVER_TLS=false
|
|
41
|
+
SUPERVISOR_GRPC_EXTERNAL_PORT=
|
|
42
|
+
SUPERVISOR_GRPC_KEEPALIVE_PERMIT_WITHOUT_CALLS=true
|
|
43
|
+
SUPERVISOR_GRPC_MIN_RECV_PING_INTERVAL_MS=60000
|
|
44
|
+
SUPERVISOR_GRPC_KEEPALIVE_TIME_MS=300000
|
|
45
|
+
SUPERVISOR_GRPC_KEEPALIVE_TIMEOUT_MS=10000
|
|
46
|
+
|
|
47
|
+
# ==== Redis Connectivity ====
|
|
48
|
+
REDIS_CONTROL_URL=redis://localhost:6379/0
|
|
49
|
+
REDIS_TELEMETRY_URL=redis://localhost:6380/0
|
|
50
|
+
|
|
51
|
+
# ==== Core Ports ====
|
|
52
|
+
REDIS_CONTROL_PORT=6379
|
|
53
|
+
REDIS_TELEMETRY_PORT=6380
|
|
54
|
+
|
|
55
|
+
# ==== Log Streams (Redis) ====
|
|
56
|
+
# Caps Redis Streams for per-task and per-workflow logs.
|
|
57
|
+
LOG_STREAM_MAXLEN_TASK=50000
|
|
58
|
+
LOG_STREAM_MAXLEN_WORKFLOW=200000
|
|
59
|
+
# Expire log stream keys after close (0 disables).
|
|
60
|
+
LOG_STREAM_TTL_SEC=3600
|
|
61
|
+
# Flush archived task logs at most every N seconds.
|
|
62
|
+
TASK_LOG_ARCHIVE_FLUSH_INTERVAL_SEC=5
|
|
63
|
+
# Flush archived task logs after buffering N entries.
|
|
64
|
+
TASK_LOG_ARCHIVE_FLUSH_MAX_ENTRIES=100
|
|
65
|
+
|
|
66
|
+
# ==== Redis Access ====
|
|
67
|
+
REDIS_ACL_ENABLED=1
|
|
68
|
+
REDIS_USERNAME=admin
|
|
69
|
+
REDIS_PASSWORD=very-strong-password
|
|
70
|
+
|
|
71
|
+
# ==== Redis TLS ====
|
|
72
|
+
# Leave empty to disable
|
|
73
|
+
REDIS_TLS_DIR=./secrets/tls/redis
|
|
74
|
+
REDIS_TLS_CA_FILE=/etc/ssl/redis/redis-ca.pem
|
|
75
|
+
REDIS_TLS_CERT_FILE=/etc/ssl/redis/redis-server.pem
|
|
76
|
+
REDIS_TLS_KEY_FILE=/etc/ssl/redis/redis-server.key
|
|
77
|
+
|
|
78
|
+
# ==== SSH Task Support ====
|
|
79
|
+
ENABLE_SERVER_SSH_PROXY=true
|
|
80
|
+
ENABLE_SERVER_SSH_FORWARD=true
|
|
81
|
+
ENABLE_SERVER_SSH_CONNECTION_AUDIT=true
|
|
82
|
+
SERVER_SSH_FORWARD_BIND_HOST=0.0.0.0
|
|
83
|
+
SERVER_SSH_FORWARD_PUBLIC_HOST=localhost
|
|
84
|
+
SERVER_SSH_FORWARD_PORT_START=32000
|
|
85
|
+
SERVER_SSH_FORWARD_PORT_END=32100
|
|
86
|
+
|
|
87
|
+
# ==== SSH Worker Defaults ====
|
|
88
|
+
ENABLE_SSH_BY_DEFAULT=true
|
|
89
|
+
SSH_DEFAULT_IMAGE=
|
|
90
|
+
SSH_DEFAULT_USER=
|
|
91
|
+
SSH_DEFAULT_TTL_SEC=
|
|
92
|
+
SSH_DEFAULT_IDLE_SEC=
|
|
93
|
+
SSH_MAX_TTL_SEC=
|
|
94
|
+
SSH_POLL_INTERVAL_SEC=
|
|
95
|
+
SSH_STOP_TIMEOUT_SEC=
|
|
96
|
+
|
|
97
|
+
# ==== General Settings ====
|
|
98
|
+
TZ=Asia/Singapore
|
|
99
|
+
LOG_LEVEL=INFO
|
|
100
|
+
|
|
101
|
+
# ==== Orchestrator Settings ====
|
|
102
|
+
ORCHESTRATOR_DISPATCH_MODE=adaptive
|
|
103
|
+
ORCHESTRATOR_WORKER_SELECTION=best_fit
|
|
104
|
+
SCHEDULER_SELECTION_JITTER=0.001
|
|
105
|
+
SCHEDULER_LAMBDA_INFERENCE=0.4
|
|
106
|
+
SCHEDULER_LAMBDA_TRAINING=0.8
|
|
107
|
+
SCHEDULER_LAMBDA_OTHER=0.5
|
|
108
|
+
ENABLE_TASK_MERGE=true
|
|
109
|
+
TASK_MERGE_MAX_BATCH_SIZE=4
|
|
110
|
+
ENABLE_CONTEXT_REUSE=true
|
|
111
|
+
WORKER_CACHE_TTL_SEC=3600
|
|
112
|
+
ENABLE_STAGE_WEIGHT_STICKINESS=false
|
|
113
|
+
ENABLE_WORKER_WATCHDOG=true
|
|
114
|
+
WORKER_DEATH_CHECK_INTERVAL=30
|
|
115
|
+
WORKER_DEATH_GRACE_SEC=60
|
|
116
|
+
|
|
117
|
+
# ==== Server Heartbeat ====
|
|
118
|
+
SERVER_HEARTBEAT_INTERVAL=30
|
|
119
|
+
|
|
120
|
+
# ==== Vast.ai Configuration ====
|
|
121
|
+
VAST_SEARCH_LIMIT=
|
|
122
|
+
VAST_MAX_RETRIES=
|
|
123
|
+
|
|
124
|
+
# ==== Worker Parameters ====
|
|
125
|
+
WORKER_LOG_LEVEL=INFO
|
|
126
|
+
HEARTBEAT_INTERVAL_SEC=30
|
|
127
|
+
WORKER_COST_PER_HOUR=1.0
|
|
128
|
+
# Directory/Docker volume for the server to look up task results after worker completion.
|
|
129
|
+
# Set to the same value as WORKER_RESULTS_DIR so the server can access worker results.
|
|
130
|
+
# For workflows with a local output destination (`spec.output.destination.type="local"`),
|
|
131
|
+
# `SERVER_RESULTS_DIR` and `WORKER_RESULTS_DIR` must point to the same shared directory
|
|
132
|
+
# or volume; otherwise, the server cannot read the worker's outputs and downstream tasks
|
|
133
|
+
# will stall in the dispatching loop.
|
|
134
|
+
# Defaults to the stack-scoped results volume when empty.
|
|
135
|
+
SERVER_RESULTS_DIR=
|
|
136
|
+
# Defaults to the stack-scoped results volume when empty.
|
|
137
|
+
WORKER_RESULTS_DIR=
|
|
138
|
+
HF_CACHE_DIR=
|
|
139
|
+
WORKER_NETWORK_BANDWIDTH_BYTES_PER_SEC=
|
|
140
|
+
WORKER_TAGS=
|
|
141
|
+
WORKER_HB_DIR=
|
|
142
|
+
FLOWMESH_BASE_URL=http://localhost:8000
|
|
143
|
+
# Supplier API key for worker authentication with the server
|
|
144
|
+
FLOWMESH_API_KEY=
|
|
145
|
+
NEBULA_API_BASE_URL=
|
|
146
|
+
# Server-side CUDA image used to probe local GPUs.
|
|
147
|
+
SERVER_CUDA_PROBE_IMAGE=nvidia/cuda:12.9.1-base-ubuntu24.04
|
|
148
|
+
# Optional Docker runtime name for GPU containers.
|
|
149
|
+
DOCKER_GPU_RUNTIME=nvidia
|
|
150
|
+
CUDA_VISIBLE_DEVICES=all
|
|
151
|
+
WORKER_UPLOAD_RESULTS=false
|
|
152
|
+
MODEL_CLEANUP_AFTER_UPLOAD=0
|
|
153
|
+
|
|
154
|
+
# ==== Model Pre-downloading ====
|
|
155
|
+
# Comma-separated list of models to pre-download during worker startup
|
|
156
|
+
# Leave empty to disable model pre-downloading
|
|
157
|
+
# Example: meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-3B-Instruct
|
|
158
|
+
PREDOWNLOAD_MODEL_LIST=
|
|
159
|
+
|
|
160
|
+
# ==== API Keys injected into workers (optional) ====
|
|
161
|
+
OPENAI_API_KEY=
|
|
162
|
+
GOOGLE_API_KEY=
|
|
163
|
+
VAST_API_KEY=
|
|
164
|
+
HF_TOKEN=
|
|
165
|
+
NEBULA_API_TOKEN=
|
|
166
|
+
|
|
167
|
+
# ==== External Plugins ====
|
|
168
|
+
# Plugins are Python packages dropped under FLOWMESH_PLUGIN_DIR
|
|
169
|
+
# (host-mounted to /app/plugins on the server) and selected by
|
|
170
|
+
# FLOWMESH_PLUGINS as a comma-separated list of top-level module
|
|
171
|
+
# names. Each named module must expose `install()` returning a
|
|
172
|
+
# `HookBindings`. Leave both empty unless you ship a plugin.
|
|
173
|
+
FLOWMESH_PLUGIN_DIR=./plugins
|
|
174
|
+
FLOWMESH_PLUGINS=
|
|
175
|
+
|
|
176
|
+
# ==== Agent Executor (youtu-agent / utu) ====
|
|
177
|
+
# All four UTU_LLM_* are required for the agent executor to run.
|
|
178
|
+
# utu LLM provider kind, e.g. "chat.completions"
|
|
179
|
+
UTU_LLM_TYPE=
|
|
180
|
+
# utu model identifier, e.g. gpt-4o-mini
|
|
181
|
+
UTU_LLM_MODEL=
|
|
182
|
+
# utu LLM base URL
|
|
183
|
+
UTU_LLM_BASE_URL=
|
|
184
|
+
# utu LLM API key
|
|
185
|
+
UTU_LLM_API_KEY=
|
|
186
|
+
# Serper API key (optional, for agent search tools)
|
|
187
|
+
SERPER_API_KEY=
|
|
188
|
+
# Jina API key (optional, for agent search tools)
|
|
189
|
+
JINA_API_KEY=
|
|
190
|
+
# Database URL for agent tracing (optional)
|
|
191
|
+
DB_URL=
|
|
192
|
+
|
|
193
|
+
# ==== n8n Integration ====
|
|
194
|
+
# AES-GCM key to decrypt encrypted n8n credentials.
|
|
195
|
+
N8N_CREDENTIAL_AES_PASSWORD=
|
|
196
|
+
|
|
197
|
+
# ==== Worker launch config (optional) ====
|
|
198
|
+
SERVER_WORKER_CONFIG=./configs/worker_config.yaml
|
|
199
|
+
|
|
200
|
+
# ==== Logging ====
|
|
201
|
+
LOG_MAX_BYTES=5242880
|
|
202
|
+
LOG_BACKUP_COUNT=5
|
|
203
|
+
SERVER_APP_RELOAD=0
|
|
204
|
+
SERVER_APP_LOG_LEVEL=info
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
services:
|
|
2
|
+
redis_control:
|
|
3
|
+
image: redis:7-alpine
|
|
4
|
+
profiles: [root]
|
|
5
|
+
container_name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_control
|
|
6
|
+
environment:
|
|
7
|
+
REDIS_ACL_ENABLED: ${REDIS_ACL_ENABLED:-0}
|
|
8
|
+
REDIS_USERNAME: ${REDIS_USERNAME:-admin}
|
|
9
|
+
REDIS_PASSWORD: ${REDIS_PASSWORD:-}
|
|
10
|
+
REDIS_TLS_CA_FILE: ${REDIS_TLS_CA_FILE:-}
|
|
11
|
+
REDIS_TLS_CERT_FILE: ${REDIS_TLS_CERT_FILE:-}
|
|
12
|
+
REDIS_TLS_KEY_FILE: ${REDIS_TLS_KEY_FILE:-}
|
|
13
|
+
command:
|
|
14
|
+
- /bin/sh
|
|
15
|
+
- -c
|
|
16
|
+
- |
|
|
17
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
18
|
+
if [ -z "${REDIS_PASSWORD:-}" ]; then
|
|
19
|
+
echo "REDIS_PASSWORD required when REDIS_ACL_ENABLED=1" >&2
|
|
20
|
+
exit 1
|
|
21
|
+
fi
|
|
22
|
+
mkdir -p /etc/redis
|
|
23
|
+
printf "user default off\nuser %s on >%s ~* &* +@all\n" "$${REDIS_USERNAME:-admin}" "$${REDIS_PASSWORD}" > /etc/redis/users.acl
|
|
24
|
+
fi
|
|
25
|
+
if [ -n "${REDIS_TLS_CA_FILE:-}" ] || [ -n "${REDIS_TLS_CERT_FILE:-}" ] || [ -n "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
26
|
+
if [ -z "${REDIS_TLS_CA_FILE:-}" ] || [ -z "${REDIS_TLS_CERT_FILE:-}" ] || [ -z "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
27
|
+
echo "REDIS_TLS_CA_FILE, REDIS_TLS_CERT_FILE, REDIS_TLS_KEY_FILE are required to enable Redis TLS" >&2
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
TLS_ARGS="--tls-port 6379 --port 0 --tls-ca-cert-file ${REDIS_TLS_CA_FILE} --tls-cert-file ${REDIS_TLS_CERT_FILE} --tls-key-file ${REDIS_TLS_KEY_FILE} --tls-auth-clients no"
|
|
31
|
+
fi
|
|
32
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
33
|
+
exec redis-server $${TLS_ARGS:-} --aclfile /etc/redis/users.acl --save 60 1 --loglevel warning --client-output-buffer-limit pubsub 1gb 512mb 60
|
|
34
|
+
else
|
|
35
|
+
exec redis-server $${TLS_ARGS:-} --save 60 1 --loglevel warning --client-output-buffer-limit pubsub 1gb 512mb 60
|
|
36
|
+
fi
|
|
37
|
+
ports:
|
|
38
|
+
- "${REDIS_CONTROL_PORT:-6379}:6379"
|
|
39
|
+
volumes:
|
|
40
|
+
- redis_control_data:/data
|
|
41
|
+
- ${REDIS_TLS_DIR:-./secrets/tls/redis}:/etc/ssl/redis:ro
|
|
42
|
+
restart: unless-stopped
|
|
43
|
+
networks:
|
|
44
|
+
- flowmesh_node_network
|
|
45
|
+
healthcheck:
|
|
46
|
+
test:
|
|
47
|
+
- CMD-SHELL
|
|
48
|
+
- |
|
|
49
|
+
TLS_ENABLED=0
|
|
50
|
+
if [ -n "${REDIS_TLS_CA_FILE:-}" ] && [ -n "${REDIS_TLS_CERT_FILE:-}" ] && [ -n "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
51
|
+
TLS_ENABLED=1
|
|
52
|
+
fi
|
|
53
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
54
|
+
if [ "$${TLS_ENABLED}" = "1" ]; then
|
|
55
|
+
redis-cli --tls --cacert "${REDIS_TLS_CA_FILE:-}" -u "rediss://${REDIS_USERNAME:-admin}:${REDIS_PASSWORD}@localhost:6379/0" ping
|
|
56
|
+
else
|
|
57
|
+
redis-cli -u "redis://${REDIS_USERNAME:-admin}:${REDIS_PASSWORD}@localhost:6379/0" ping
|
|
58
|
+
fi
|
|
59
|
+
else
|
|
60
|
+
if [ "$${TLS_ENABLED}" = "1" ]; then
|
|
61
|
+
redis-cli --tls --cacert "${REDIS_TLS_CA_FILE:-}" -p 6379 ping
|
|
62
|
+
else
|
|
63
|
+
redis-cli -p 6379 ping
|
|
64
|
+
fi
|
|
65
|
+
fi
|
|
66
|
+
interval: 5s
|
|
67
|
+
timeout: 3s
|
|
68
|
+
retries: 5
|
|
69
|
+
|
|
70
|
+
redis_telemetry:
|
|
71
|
+
image: redis:7-alpine
|
|
72
|
+
profiles: [root]
|
|
73
|
+
container_name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_telemetry
|
|
74
|
+
environment:
|
|
75
|
+
REDIS_ACL_ENABLED: ${REDIS_ACL_ENABLED:-0}
|
|
76
|
+
REDIS_USERNAME: ${REDIS_USERNAME:-admin}
|
|
77
|
+
REDIS_PASSWORD: ${REDIS_PASSWORD:-}
|
|
78
|
+
REDIS_TLS_CA_FILE: ${REDIS_TLS_CA_FILE:-}
|
|
79
|
+
REDIS_TLS_CERT_FILE: ${REDIS_TLS_CERT_FILE:-}
|
|
80
|
+
REDIS_TLS_KEY_FILE: ${REDIS_TLS_KEY_FILE:-}
|
|
81
|
+
command:
|
|
82
|
+
- /bin/sh
|
|
83
|
+
- -c
|
|
84
|
+
- |
|
|
85
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
86
|
+
if [ -z "${REDIS_PASSWORD:-}" ]; then
|
|
87
|
+
echo "REDIS_PASSWORD required when REDIS_ACL_ENABLED=1" >&2
|
|
88
|
+
exit 1
|
|
89
|
+
fi
|
|
90
|
+
mkdir -p /etc/redis
|
|
91
|
+
printf "user default off\nuser %s on >%s ~* &* +@all\n" "$${REDIS_USERNAME:-admin}" "$${REDIS_PASSWORD}" > /etc/redis/users.acl
|
|
92
|
+
fi
|
|
93
|
+
if [ -n "${REDIS_TLS_CA_FILE:-}" ] || [ -n "${REDIS_TLS_CERT_FILE:-}" ] || [ -n "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
94
|
+
if [ -z "${REDIS_TLS_CA_FILE:-}" ] || [ -z "${REDIS_TLS_CERT_FILE:-}" ] || [ -z "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
95
|
+
echo "REDIS_TLS_CA_FILE, REDIS_TLS_CERT_FILE, REDIS_TLS_KEY_FILE are required to enable Redis TLS" >&2
|
|
96
|
+
exit 1
|
|
97
|
+
fi
|
|
98
|
+
TLS_ARGS="--tls-port 6379 --port 0 --tls-ca-cert-file ${REDIS_TLS_CA_FILE} --tls-cert-file ${REDIS_TLS_CERT_FILE} --tls-key-file ${REDIS_TLS_KEY_FILE} --tls-auth-clients no"
|
|
99
|
+
fi
|
|
100
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
101
|
+
exec redis-server $${TLS_ARGS:-} --aclfile /etc/redis/users.acl --save 300 1 --loglevel warning
|
|
102
|
+
else
|
|
103
|
+
exec redis-server $${TLS_ARGS:-} --save 300 1 --loglevel warning
|
|
104
|
+
fi
|
|
105
|
+
ports:
|
|
106
|
+
- "${REDIS_TELEMETRY_PORT:-6380}:6379"
|
|
107
|
+
volumes:
|
|
108
|
+
- redis_telemetry_data:/data
|
|
109
|
+
- ${REDIS_TLS_DIR:-./secrets/tls/redis}:/etc/ssl/redis:ro
|
|
110
|
+
restart: unless-stopped
|
|
111
|
+
networks:
|
|
112
|
+
- flowmesh_node_network
|
|
113
|
+
healthcheck:
|
|
114
|
+
test:
|
|
115
|
+
- CMD-SHELL
|
|
116
|
+
- |
|
|
117
|
+
TLS_ENABLED=0
|
|
118
|
+
if [ -n "${REDIS_TLS_CA_FILE:-}" ] && [ -n "${REDIS_TLS_CERT_FILE:-}" ] && [ -n "${REDIS_TLS_KEY_FILE:-}" ]; then
|
|
119
|
+
TLS_ENABLED=1
|
|
120
|
+
fi
|
|
121
|
+
if [ "${REDIS_ACL_ENABLED:-0}" = "1" ]; then
|
|
122
|
+
if [ "$${TLS_ENABLED}" = "1" ]; then
|
|
123
|
+
redis-cli --tls --cacert "${REDIS_TLS_CA_FILE:-}" -u "rediss://${REDIS_USERNAME:-admin}:${REDIS_PASSWORD}@localhost:6379/0" ping
|
|
124
|
+
else
|
|
125
|
+
redis-cli -u "redis://${REDIS_USERNAME:-admin}:${REDIS_PASSWORD}@localhost:6379/0" ping
|
|
126
|
+
fi
|
|
127
|
+
else
|
|
128
|
+
if [ "$${TLS_ENABLED}" = "1" ]; then
|
|
129
|
+
redis-cli --tls --cacert "${REDIS_TLS_CA_FILE:-}" -p 6379 ping
|
|
130
|
+
else
|
|
131
|
+
redis-cli -p 6379 ping
|
|
132
|
+
fi
|
|
133
|
+
fi
|
|
134
|
+
interval: 5s
|
|
135
|
+
timeout: 3s
|
|
136
|
+
retries: 5
|
|
137
|
+
|
|
138
|
+
server:
|
|
139
|
+
image: ${FLOWMESH_REGISTRY:-ghcr.io/mlsys-io}/flowmesh_server:${FLOWMESH_VERSION:-dev}
|
|
140
|
+
container_name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_server
|
|
141
|
+
depends_on:
|
|
142
|
+
redis_control:
|
|
143
|
+
condition: service_healthy
|
|
144
|
+
required: false
|
|
145
|
+
redis_telemetry:
|
|
146
|
+
condition: service_healthy
|
|
147
|
+
required: false
|
|
148
|
+
env_file:
|
|
149
|
+
- ${STACK_ENV_FILE:-./.env}
|
|
150
|
+
environment:
|
|
151
|
+
REDIS_CONTROL_URL: ${REDIS_CONTROL_URL:-redis://localhost:${REDIS_CONTROL_PORT:-6379}/0}
|
|
152
|
+
REDIS_TELEMETRY_URL: ${REDIS_TELEMETRY_URL:-redis://localhost:${REDIS_TELEMETRY_PORT:-6380}/0}
|
|
153
|
+
SERVER_METRICS_DIR: "/mnt/flowmesh-metrics"
|
|
154
|
+
LOG_FILE: "/var/log/flowmesh-server/server.log"
|
|
155
|
+
WORKER_CONFIG_PATH: /etc/flowmesh/worker_config.yaml
|
|
156
|
+
RESULTS_DIR: "/mnt/flowmesh-results"
|
|
157
|
+
WORKER_RESULTS_DIR: ${WORKER_RESULTS_DIR:-${FLOWMESH_STACK_SLUG:-flowmesh_node}_results}
|
|
158
|
+
SERVER_APP_PORT: ${SERVER_HTTP_PORT:-8000}
|
|
159
|
+
FLOWMESH_BASE_URL: ${FLOWMESH_BASE_URL:-http://localhost:${SERVER_HTTP_PORT:-8000}}
|
|
160
|
+
volumes:
|
|
161
|
+
# Bare `flowmesh_results` is the compose volume key below; it resolves to
|
|
162
|
+
# the slug-scoped Docker volume name when SERVER_RESULTS_DIR is empty.
|
|
163
|
+
- ${SERVER_RESULTS_DIR:-flowmesh_results}:/mnt/flowmesh-results
|
|
164
|
+
- flowmesh_metrics:/mnt/flowmesh-metrics
|
|
165
|
+
- flowmesh_server_logs:/var/log/flowmesh-server
|
|
166
|
+
- /var/run/docker.sock:/var/run/docker.sock
|
|
167
|
+
- ${SERVER_WORKER_CONFIG:-./configs/worker_config.yaml}:/etc/flowmesh/worker_config.yaml:ro
|
|
168
|
+
- ${FLOWMESH_PLUGIN_DIR:-./plugins}:/app/plugins:ro
|
|
169
|
+
- ${REDIS_TLS_DIR:-./secrets/tls/redis}:/etc/ssl/redis:ro
|
|
170
|
+
- ${SERVER_TLS_DIR:-./secrets/tls/server}:/etc/ssl/server:ro
|
|
171
|
+
restart: unless-stopped
|
|
172
|
+
network_mode: host
|
|
173
|
+
healthcheck:
|
|
174
|
+
test:
|
|
175
|
+
[
|
|
176
|
+
"CMD",
|
|
177
|
+
"curl",
|
|
178
|
+
"-sf",
|
|
179
|
+
"http://localhost:${SERVER_HTTP_PORT:-8000}/healthz",
|
|
180
|
+
]
|
|
181
|
+
interval: 30s
|
|
182
|
+
timeout: 10s
|
|
183
|
+
retries: 3
|
|
184
|
+
start_period: 40s
|
|
185
|
+
|
|
186
|
+
networks:
|
|
187
|
+
flowmesh_node_network:
|
|
188
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_network
|
|
189
|
+
driver: bridge
|
|
190
|
+
|
|
191
|
+
volumes:
|
|
192
|
+
redis_control_data:
|
|
193
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_control_data
|
|
194
|
+
redis_telemetry_data:
|
|
195
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_telemetry_data
|
|
196
|
+
flowmesh_results:
|
|
197
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_results
|
|
198
|
+
flowmesh_metrics:
|
|
199
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_metrics
|
|
200
|
+
flowmesh_server_logs:
|
|
201
|
+
name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_server_logs
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
variable "REGISTRY" {
|
|
2
|
+
default = "ghcr.io/mlsys-io"
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
variable "VERSION" {
|
|
6
|
+
default = "dev"
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
variable "BUILD_REF" {
|
|
10
|
+
default = "local"
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
variable "BUILD_CREATED" {
|
|
14
|
+
default = "unknown"
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
group "server" {
|
|
18
|
+
targets = ["flowmesh_server"]
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
group "workers" {
|
|
22
|
+
targets = [
|
|
23
|
+
"flowmesh_worker_cpu",
|
|
24
|
+
"flowmesh_worker_gpu",
|
|
25
|
+
"flowmesh_ssh_cpu",
|
|
26
|
+
"flowmesh_ssh_gpu",
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
group "builders" {
|
|
31
|
+
targets = [
|
|
32
|
+
"flowmesh_worker_gpu_builder",
|
|
33
|
+
]
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
group "default" {
|
|
37
|
+
targets = concat(
|
|
38
|
+
group.server.targets,
|
|
39
|
+
group.workers.targets,
|
|
40
|
+
)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
target "flowmesh_server" {
|
|
44
|
+
context = "."
|
|
45
|
+
dockerfile = "src/server/Dockerfile"
|
|
46
|
+
args = {
|
|
47
|
+
BUILD_VERSION = "${VERSION}"
|
|
48
|
+
BUILD_REF = "${BUILD_REF}"
|
|
49
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
50
|
+
}
|
|
51
|
+
tags = ["${REGISTRY}/flowmesh_server:${VERSION}"]
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
target "flowmesh_worker_cpu" {
|
|
55
|
+
context = "."
|
|
56
|
+
dockerfile = "src/worker/docker/Dockerfile.cpu"
|
|
57
|
+
args = {
|
|
58
|
+
BUILD_VERSION = "${VERSION}"
|
|
59
|
+
BUILD_REF = "${BUILD_REF}"
|
|
60
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
61
|
+
}
|
|
62
|
+
tags = ["${REGISTRY}/flowmesh_worker:${VERSION}-cpu"]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
target "flowmesh_worker_gpu_builder" {
|
|
66
|
+
context = "."
|
|
67
|
+
dockerfile = "src/worker/docker/Dockerfile.cuda.builder"
|
|
68
|
+
args = {
|
|
69
|
+
BUILD_VERSION = "${VERSION}"
|
|
70
|
+
BUILD_REF = "${BUILD_REF}"
|
|
71
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
72
|
+
}
|
|
73
|
+
tags = ["${REGISTRY}/flowmesh_worker_builder:${VERSION}-gpu"]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
target "flowmesh_worker_gpu" {
|
|
77
|
+
context = "."
|
|
78
|
+
dockerfile = "src/worker/docker/Dockerfile.cuda"
|
|
79
|
+
contexts = {
|
|
80
|
+
builder = "target:flowmesh_worker_gpu_builder"
|
|
81
|
+
}
|
|
82
|
+
args = {
|
|
83
|
+
BUILD_VERSION = "${VERSION}"
|
|
84
|
+
BUILD_REF = "${BUILD_REF}"
|
|
85
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
86
|
+
}
|
|
87
|
+
tags = ["${REGISTRY}/flowmesh_worker:${VERSION}-gpu"]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
target "flowmesh_ssh_cpu" {
|
|
91
|
+
context = "."
|
|
92
|
+
dockerfile = "src/worker/docker/Dockerfile.ssh.cpu"
|
|
93
|
+
args = {
|
|
94
|
+
BUILD_VERSION = "${VERSION}"
|
|
95
|
+
BUILD_REF = "${BUILD_REF}"
|
|
96
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
97
|
+
}
|
|
98
|
+
tags = ["${REGISTRY}/flowmesh_ssh:${VERSION}-cpu"]
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
target "flowmesh_ssh_gpu" {
|
|
102
|
+
context = "."
|
|
103
|
+
dockerfile = "src/worker/docker/Dockerfile.ssh.gpu"
|
|
104
|
+
args = {
|
|
105
|
+
BUILD_VERSION = "${VERSION}"
|
|
106
|
+
BUILD_REF = "${BUILD_REF}"
|
|
107
|
+
BUILD_CREATED = "${BUILD_CREATED}"
|
|
108
|
+
}
|
|
109
|
+
tags = ["${REGISTRY}/flowmesh_ssh:${VERSION}-gpu"]
|
|
110
|
+
}
|