sbcli-pre 1.3.5__zip → 1.3.7__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/PKG-INFO +1 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/env_var +2 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/PKG-INFO +1 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/SOURCES.txt +2 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/cluster_ops.py +0 -26
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/device_controller.py +17 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/health_controller.py +1 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/tasks_controller.py +21 -12
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboard.yml +12 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/cluster.json +2355 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/devices.json +2436 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/lvols.json +2441 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/node-exporter.json +23743 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/nodes.json +2434 -0
- sbcli_pre-1.3.7/simplyblock_core/scripts/dashboards/pools.json +2399 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/datasource.yml +3 -2
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/deploy_stack.sh +0 -7
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +79 -8
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/install_deps.sh +0 -2
- sbcli_pre-1.3.7/simplyblock_core/scripts/objstore.yml +3 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/distr_event_collector.py +2 -2
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/health_check_service.py +1 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/tasks_runner_restart.py +14 -1
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/storage_node_ops.py +29 -11
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/snode_ops.py +2 -2
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_deployer.py +3 -3
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/cluster.json +0 -2357
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/devices.json +0 -2438
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/lvols.json +0 -2443
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/node-exporter.json +0 -23745
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/nodes.json +0 -2436
- sbcli_pre-1.3.5/simplyblock_core/scripts/dashboards/pools.json +0 -2401
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/README.md +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/pyproject.toml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/setup.cfg +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/setup.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_cli/cli.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/constants.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/caching_node_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/cluster_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/events_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/lvol_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/pool_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/snapshot_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/controllers/tasks_events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/distr_controller.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/kv_store.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/mgmt_node_ops.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/deployer.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/events.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/job_schedule.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/nvme_device.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/models/storage_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/rpc_client.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/docker-compose-swarm.yml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/haproxy.cfg +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/capacity_and_stats_collector.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/device_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/lvol_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/lvol_stat_collector.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/port_stat_collector.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/storage_node_monitor.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/services/tasks_runner_migration.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/snode_client.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/utils.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/app.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_cluster.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_device.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_lvol.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_pool.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/blueprints/web_api_storage_node.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/node_utils.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
- {sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_web/utils.py +0 -0
@@ -1,10 +1,11 @@
|
|
1
1
|
apiVersion: 1
|
2
2
|
datasources:
|
3
|
-
- name:
|
3
|
+
- name: Thanos
|
4
4
|
type: prometheus
|
5
|
-
url: http://
|
5
|
+
url: http://thanos-query:9091
|
6
6
|
isDefault: true
|
7
7
|
access: proxy
|
8
|
+
uid: PBFA97CFB590B2093
|
8
9
|
editable: true
|
9
10
|
- name: GRAYLOG
|
10
11
|
type: elasticsearch
|
@@ -21,13 +21,6 @@ fi
|
|
21
21
|
|
22
22
|
docker network create monitoring-net -d overlay --attachable
|
23
23
|
|
24
|
-
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
25
|
-
|
26
|
-
#if [ -n "$INSTANCE_ID" ]
|
27
|
-
#then
|
28
|
-
# export USE_EFS="rexray/efs"
|
29
|
-
#fi
|
30
|
-
|
31
24
|
docker stack deploy --compose-file="$DIR"/docker-compose-swarm-monitoring.yml monitoring
|
32
25
|
|
33
26
|
# wait for the services to become online
|
{sbcli_pre-1.3.5 → sbcli_pre-1.3.7}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml
RENAMED
@@ -68,6 +68,7 @@ services:
|
|
68
68
|
ClusterIP: "HAProxy"
|
69
69
|
ClusterSecret: "${CLUSTER_SECRET}"
|
70
70
|
deploy:
|
71
|
+
mode: global
|
71
72
|
placement:
|
72
73
|
constraints: [node.role == manager]
|
73
74
|
networks:
|
@@ -76,6 +77,7 @@ services:
|
|
76
77
|
pushgateway:
|
77
78
|
image: prom/pushgateway
|
78
79
|
deploy:
|
80
|
+
mode: global
|
79
81
|
placement:
|
80
82
|
constraints: [node.role == manager]
|
81
83
|
networks:
|
@@ -90,9 +92,82 @@ services:
|
|
90
92
|
command:
|
91
93
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
92
94
|
- "--storage.tsdb.path=/prometheus"
|
95
|
+
- "--storage.tsdb.no-lockfile"
|
93
96
|
- "--storage.tsdb.retention.time=${RETENTION_PERIOD}"
|
97
|
+
- "--storage.tsdb.min-block-duration=2h"
|
98
|
+
- "--storage.tsdb.max-block-duration=2h"
|
94
99
|
restart: "always"
|
95
100
|
deploy:
|
101
|
+
mode: global
|
102
|
+
placement:
|
103
|
+
constraints: [node.role == manager]
|
104
|
+
networks:
|
105
|
+
- monitoring-net
|
106
|
+
|
107
|
+
thanos-sidecar:
|
108
|
+
image: thanosio/thanos:v0.31.0
|
109
|
+
user: root
|
110
|
+
command:
|
111
|
+
- sidecar
|
112
|
+
- --tsdb.path=/prometheus
|
113
|
+
- --prometheus.url=http://prometheus:9090
|
114
|
+
- --objstore.config-file=/etc/thanos/objstore.yml
|
115
|
+
volumes:
|
116
|
+
- prometheus_data:/prometheus
|
117
|
+
- ./objstore.yml:/etc/thanos/objstore.yml
|
118
|
+
deploy:
|
119
|
+
mode: global
|
120
|
+
placement:
|
121
|
+
constraints: [node.role == manager]
|
122
|
+
networks:
|
123
|
+
- monitoring-net
|
124
|
+
|
125
|
+
thanos-store:
|
126
|
+
image: thanosio/thanos:v0.31.0
|
127
|
+
command:
|
128
|
+
- store
|
129
|
+
- --objstore.config-file=/etc/thanos/objstore.yml
|
130
|
+
- --index-cache-size=500MB
|
131
|
+
- --chunk-pool-size=500MB
|
132
|
+
volumes:
|
133
|
+
- ./objstore.yml:/etc/thanos/objstore.yml
|
134
|
+
deploy:
|
135
|
+
mode: global
|
136
|
+
placement:
|
137
|
+
constraints: [node.role == manager]
|
138
|
+
networks:
|
139
|
+
- monitoring-net
|
140
|
+
|
141
|
+
thanos-query:
|
142
|
+
image: thanosio/thanos:v0.31.0
|
143
|
+
command:
|
144
|
+
- query
|
145
|
+
- --http-address=0.0.0.0:9091
|
146
|
+
- --store=thanos-store:10901
|
147
|
+
- --store=thanos-sidecar:10901
|
148
|
+
deploy:
|
149
|
+
mode: global
|
150
|
+
placement:
|
151
|
+
constraints: [node.role == manager]
|
152
|
+
networks:
|
153
|
+
- monitoring-net
|
154
|
+
|
155
|
+
thanos-compactor:
|
156
|
+
image: thanosio/thanos:v0.31.0
|
157
|
+
command:
|
158
|
+
- compact
|
159
|
+
- --data-dir=/data
|
160
|
+
- --objstore.config-file=/etc/thanos/objstore.yml
|
161
|
+
- --retention.resolution-raw=30d
|
162
|
+
- --retention.resolution-5m=60d
|
163
|
+
- --retention.resolution-1h=90d
|
164
|
+
- --compact.concurrency=1
|
165
|
+
- --wait
|
166
|
+
volumes:
|
167
|
+
- ./objstore.yml:/etc/thanos/objstore.yml
|
168
|
+
- thanos_compactor_data:/data
|
169
|
+
deploy:
|
170
|
+
mode: global
|
96
171
|
placement:
|
97
172
|
constraints: [node.role == manager]
|
98
173
|
networks:
|
@@ -133,6 +208,8 @@ services:
|
|
133
208
|
- ./datasource.yml:/etc/grafana/provisioning/datasources/datasource.yaml
|
134
209
|
- grafana_data:/var/lib/grafana
|
135
210
|
- ./alerting:/etc/grafana/provisioning/alerting
|
211
|
+
- ./dashboard.yml:/etc/grafana/provisioning/dashboards/main.yaml
|
212
|
+
- ./dashboards:/var/lib/grafana/dashboards
|
136
213
|
deploy:
|
137
214
|
placement:
|
138
215
|
constraints: [node.role == manager]
|
@@ -152,19 +229,13 @@ services:
|
|
152
229
|
|
153
230
|
volumes:
|
154
231
|
mongodb_data:
|
155
|
-
driver: ${USE_EFS:-local}
|
156
232
|
os_data:
|
157
|
-
driver: ${USE_EFS:-local}
|
158
233
|
graylog_data:
|
159
|
-
driver: ${USE_EFS:-local}
|
160
234
|
graylog_journal:
|
161
|
-
driver: ${USE_EFS:-local}
|
162
235
|
grafana_data:
|
163
|
-
|
164
|
-
graylog_config:
|
165
|
-
driver: ${USE_EFS:-local}
|
236
|
+
graylog_config:
|
166
237
|
prometheus_data:
|
167
|
-
|
238
|
+
thanos_compactor_data:
|
168
239
|
|
169
240
|
networks:
|
170
241
|
monitoring-net:
|
@@ -15,8 +15,6 @@ sudo yum install hostname pkg-config git wget python3-pip yum-utils docker-ce do
|
|
15
15
|
sudo systemctl enable docker
|
16
16
|
sudo systemctl start docker
|
17
17
|
|
18
|
-
#sudo docker plugin install rexray/efs --grant-all-permissions EFS_TAG=$INSTANCE_ID REXRAY_PREEMPT=true
|
19
|
-
|
20
18
|
wget https://github.com/apple/foundationdb/releases/download/7.3.3/foundationdb-clients-7.3.3-1.el7.x86_64.rpm -q
|
21
19
|
sudo rpm -U foundationdb-clients-7.3.3-1.el7.x86_64.rpm --quiet --reinstall
|
22
20
|
rm -f foundationdb-clients-7.3.3-1.el7.x86_64.rpm
|
@@ -160,8 +160,8 @@ while True:
|
|
160
160
|
logger.info(f"Processing event: {eid}")
|
161
161
|
process_event(eid)
|
162
162
|
|
163
|
-
|
164
|
-
|
163
|
+
logger.info(f"Discarding events: {len(events)}")
|
164
|
+
client.distr_status_events_discard_then_get(len(events), 0)
|
165
165
|
|
166
166
|
except Exception as e:
|
167
167
|
logger.error("Failed to process distr events")
|
@@ -125,7 +125,7 @@ while True:
|
|
125
125
|
logger.info(f"Checking bdev: {remote_device.remote_bdev} ... ok")
|
126
126
|
else:
|
127
127
|
logger.info(f"Checking bdev: {remote_device.remote_bdev} ... not found")
|
128
|
-
node_remote_devices_check &= bool(ret)
|
128
|
+
# node_remote_devices_check &= bool(ret)
|
129
129
|
|
130
130
|
health_check_status = is_node_online and node_devices_check and node_remote_devices_check
|
131
131
|
set_node_health_check(snode, health_check_status)
|
@@ -5,7 +5,7 @@ import sys
|
|
5
5
|
|
6
6
|
|
7
7
|
from simplyblock_core import constants, kv_store, storage_node_ops
|
8
|
-
from simplyblock_core.controllers import device_controller, tasks_events
|
8
|
+
from simplyblock_core.controllers import device_controller, tasks_events, health_controller
|
9
9
|
from simplyblock_core.models.job_schedule import JobSchedule
|
10
10
|
from simplyblock_core.models.nvme_device import NVMeDevice
|
11
11
|
|
@@ -169,6 +169,19 @@ def task_runner_node(task):
|
|
169
169
|
task.write_to_db(db_controller.kv_store)
|
170
170
|
tasks_events.task_updated(task)
|
171
171
|
|
172
|
+
# is node reachable?
|
173
|
+
ping_check = health_controller._check_node_ping(node.mgmt_ip)
|
174
|
+
logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
|
175
|
+
node_api_check = health_controller._check_node_api(node.mgmt_ip)
|
176
|
+
logger.info(f"Check: node API {node.mgmt_ip}:5000 ... {node_api_check}")
|
177
|
+
if not ping_check or not node_api_check:
|
178
|
+
# node is unreachable, retry
|
179
|
+
logger.info(f"Node is not reachable: {task.node_id}, retry")
|
180
|
+
task.function_result = f"Node is unreachable, retry"
|
181
|
+
task.retry += 1
|
182
|
+
task.write_to_db(db_controller.kv_store)
|
183
|
+
return False
|
184
|
+
|
172
185
|
# shutting down node
|
173
186
|
logger.info(f"Shutdown node {node.get_id()}")
|
174
187
|
ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True)
|
@@ -973,6 +973,12 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False):
|
|
973
973
|
logger.error(f"Can not remove online node: {node_id}")
|
974
974
|
return False
|
975
975
|
|
976
|
+
task_id = tasks_controller.get_active_node_restart_task(snode.cluster_id, snode.get_id())
|
977
|
+
if task_id:
|
978
|
+
logger.error(f"Restart task found: {task_id}, can not remove storage node")
|
979
|
+
if force_remove is False:
|
980
|
+
return False
|
981
|
+
|
976
982
|
if snode.lvols:
|
977
983
|
if force_migrate:
|
978
984
|
for lvol_id in snode.lvols:
|
@@ -1064,6 +1070,11 @@ def restart_storage_node(
|
|
1064
1070
|
logger.error(f"Can not restart online node: {node_id}")
|
1065
1071
|
return False
|
1066
1072
|
|
1073
|
+
task_id = tasks_controller.get_active_node_restart_task(snode.cluster_id, snode.get_id())
|
1074
|
+
if task_id:
|
1075
|
+
logger.error(f"Restart task found: {task_id}, can not restart storage node")
|
1076
|
+
return False
|
1077
|
+
|
1067
1078
|
logger.info("Setting node state to restarting")
|
1068
1079
|
old_status = snode.status
|
1069
1080
|
snode.status = StorageNode.STATUS_RESTARTING
|
@@ -1469,16 +1480,11 @@ def shutdown_storage_node(node_id, force=False):
|
|
1469
1480
|
if force is False:
|
1470
1481
|
return False
|
1471
1482
|
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1477
|
-
# online_nodes += 1
|
1478
|
-
# if cls[0].ha_type == "ha" and online_nodes <= 3:
|
1479
|
-
# logger.warning(f"Cluster mode is HA but online storage nodes are less than 3")
|
1480
|
-
# if force is False:
|
1481
|
-
# return False
|
1483
|
+
task_id = tasks_controller.get_active_node_restart_task(snode.cluster_id, snode.get_id())
|
1484
|
+
if task_id:
|
1485
|
+
logger.error(f"Restart task found: {task_id}, can not shutdown storage node")
|
1486
|
+
if force is False:
|
1487
|
+
return False
|
1482
1488
|
|
1483
1489
|
logger.info("Shutting down node")
|
1484
1490
|
old_status = snode.status
|
@@ -1538,7 +1544,14 @@ def suspend_storage_node(node_id, force=False):
|
|
1538
1544
|
logger.info("Node found: %s in state: %s", snode.hostname, snode.status)
|
1539
1545
|
if snode.status != StorageNode.STATUS_ONLINE:
|
1540
1546
|
logger.error("Node is not in online state")
|
1541
|
-
|
1547
|
+
if force is False:
|
1548
|
+
return False
|
1549
|
+
|
1550
|
+
task_id = tasks_controller.get_active_node_restart_task(snode.cluster_id, snode.get_id())
|
1551
|
+
if task_id:
|
1552
|
+
logger.error(f"Restart task found: {task_id}, can not suspend storage node")
|
1553
|
+
if force is False:
|
1554
|
+
return False
|
1542
1555
|
|
1543
1556
|
cluster = db_controller.get_cluster_by_id(snode.cluster_id)
|
1544
1557
|
snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id)
|
@@ -1599,6 +1612,11 @@ def resume_storage_node(node_id):
|
|
1599
1612
|
logger.error("Node is not in suspended state")
|
1600
1613
|
return False
|
1601
1614
|
|
1615
|
+
task_id = tasks_controller.get_active_node_restart_task(snode.cluster_id, snode.get_id())
|
1616
|
+
if task_id:
|
1617
|
+
logger.error(f"Restart task found: {task_id}, can not resume storage node")
|
1618
|
+
return False
|
1619
|
+
|
1602
1620
|
logger.info("Resuming node")
|
1603
1621
|
|
1604
1622
|
logger.info("Sending cluster event updates")
|
@@ -100,9 +100,9 @@ def spdk_process_start():
|
|
100
100
|
node.remove(force=True)
|
101
101
|
time.sleep(2)
|
102
102
|
|
103
|
-
spdk_debug =
|
103
|
+
spdk_debug = ""
|
104
104
|
if set_debug:
|
105
|
-
spdk_debug = 1
|
105
|
+
spdk_debug = "1"
|
106
106
|
|
107
107
|
spdk_image = constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE
|
108
108
|
if 'spdk_image' in data and data['spdk_image']:
|
@@ -30,8 +30,8 @@ ssm = boto3.client('ssm', region_name=region)
|
|
30
30
|
s3 = boto3.client('s3', region_name=region)
|
31
31
|
|
32
32
|
|
33
|
-
def get_instance_tf_engine_instance_id():
|
34
|
-
tag_value = 'tfengine'
|
33
|
+
def get_instance_tf_engine_instance_id(workspace: str):
|
34
|
+
tag_value = f'{workspace}-tfengine'
|
35
35
|
tag_key = 'Name'
|
36
36
|
|
37
37
|
ec2 = boto3.client('ec2', region_name=region)
|
@@ -145,7 +145,7 @@ def update_cluster(d, kv_store, storage_nodes, availability_zone):
|
|
145
145
|
d.status = "in_progress"
|
146
146
|
d.write_to_db(kv_store)
|
147
147
|
|
148
|
-
instance_ids = get_instance_tf_engine_instance_id()
|
148
|
+
instance_ids = get_instance_tf_engine_instance_id(d.tf_workspace)
|
149
149
|
if len(instance_ids) == 0:
|
150
150
|
# wait for a min and try again before returning error on the API
|
151
151
|
print('no instance IDs')
|