k8s-helper-cli 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_helper/__init__.py +1 -1
- k8s_helper/cli.py +38 -1
- k8s_helper/core.py +450 -21
- {k8s_helper_cli-0.4.2.dist-info → k8s_helper_cli-0.5.0.dist-info}/METADATA +1 -1
- k8s_helper_cli-0.5.0.dist-info/RECORD +11 -0
- k8s_helper_cli-0.4.2.dist-info/RECORD +0 -11
- {k8s_helper_cli-0.4.2.dist-info → k8s_helper_cli-0.5.0.dist-info}/WHEEL +0 -0
- {k8s_helper_cli-0.4.2.dist-info → k8s_helper_cli-0.5.0.dist-info}/entry_points.txt +0 -0
- {k8s_helper_cli-0.4.2.dist-info → k8s_helper_cli-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {k8s_helper_cli-0.4.2.dist-info → k8s_helper_cli-0.5.0.dist-info}/top_level.txt +0 -0
k8s_helper/__init__.py
CHANGED
k8s_helper/cli.py
CHANGED
@@ -1359,6 +1359,7 @@ def setup_monitoring(
|
|
1359
1359
|
namespace: str = typer.Option("monitoring", "--namespace", "-n", help="Namespace for monitoring stack"),
|
1360
1360
|
grafana_service_type: str = typer.Option("NodePort", "--service-type", "-t", help="Grafana service type: NodePort, LoadBalancer, ClusterIP"),
|
1361
1361
|
import_dashboard: bool = typer.Option(True, "--import-dashboard/--no-dashboard", help="Import default Kubernetes dashboard"),
|
1362
|
+
install_kube_state_metrics: bool = typer.Option(True, "--install-kube-state-metrics/--no-kube-state-metrics", help="Install kube-state-metrics for cluster metrics"),
|
1362
1363
|
wait: bool = typer.Option(True, "--wait/--no-wait", help="Wait for deployments to be ready"),
|
1363
1364
|
show_info: bool = typer.Option(True, "--show-info/--no-show-info", help="Show monitoring stack information after setup")
|
1364
1365
|
):
|
@@ -1377,12 +1378,17 @@ def setup_monitoring(
|
|
1377
1378
|
if import_dashboard:
|
1378
1379
|
console.print("📊 Will import default Kubernetes dashboard")
|
1379
1380
|
|
1381
|
+
if install_kube_state_metrics:
|
1382
|
+
console.print("📈 Will install kube-state-metrics for cluster metrics")
|
1383
|
+
|
1380
1384
|
# Show what will be deployed
|
1381
1385
|
console.print("\n📋 Components to deploy:")
|
1382
1386
|
console.print(" • Prometheus server with cluster monitoring configuration")
|
1383
1387
|
console.print(" • Grafana with admin credentials (admin/admin123)")
|
1384
1388
|
console.print(" • ServiceAccount and RBAC for Prometheus")
|
1385
1389
|
console.print(" • ConfigMaps for Prometheus configuration")
|
1390
|
+
if install_kube_state_metrics:
|
1391
|
+
console.print(" • kube-state-metrics for cluster metrics (via Helm or manual)")
|
1386
1392
|
if import_dashboard:
|
1387
1393
|
console.print(" • Default Kubernetes metrics dashboard")
|
1388
1394
|
|
@@ -1394,12 +1400,21 @@ def setup_monitoring(
|
|
1394
1400
|
namespace=namespace,
|
1395
1401
|
grafana_service_type=grafana_service_type,
|
1396
1402
|
import_dashboard=import_dashboard,
|
1397
|
-
wait_for_ready=wait
|
1403
|
+
wait_for_ready=wait,
|
1404
|
+
install_kube_state_metrics=install_kube_state_metrics
|
1398
1405
|
)
|
1399
1406
|
|
1400
1407
|
if result['success']:
|
1401
1408
|
console.print("✅ Monitoring stack deployed successfully!")
|
1402
1409
|
|
1410
|
+
# Show warning if Grafana configuration failed
|
1411
|
+
if result.get('warning'):
|
1412
|
+
console.print(f"⚠️ {result['warning']}")
|
1413
|
+
console.print("💡 You can manually configure Grafana by:")
|
1414
|
+
console.print(" 1. Accessing Grafana with admin/admin123")
|
1415
|
+
console.print(" 2. Adding Prometheus as a data source")
|
1416
|
+
console.print(f" 3. Use URL: http://prometheus-service.{result['namespace']}.svc.cluster.local:9090")
|
1417
|
+
|
1403
1418
|
# Show deployment summary
|
1404
1419
|
console.print(f"\n📋 Deployment Summary:")
|
1405
1420
|
console.print(f"📍 Namespace: {result['namespace']}")
|
@@ -1412,9 +1427,31 @@ def setup_monitoring(
|
|
1412
1427
|
if result['grafana']['deployed']:
|
1413
1428
|
console.print("✅ Grafana: Deployed")
|
1414
1429
|
console.print(f"🔑 Admin credentials: {result['grafana']['admin_user']}/{result['grafana']['admin_password']}")
|
1430
|
+
|
1431
|
+
# Show Grafana configuration status
|
1432
|
+
if result.get('grafana_configured', True):
|
1433
|
+
console.print("✅ Grafana: Automatically configured with Prometheus")
|
1434
|
+
else:
|
1435
|
+
console.print("⚠️ Grafana: Manual configuration required")
|
1415
1436
|
else:
|
1416
1437
|
console.print("❌ Grafana: Failed to deploy")
|
1417
1438
|
|
1439
|
+
# Show kube-state-metrics status
|
1440
|
+
if install_kube_state_metrics and 'kube_state_metrics' in result:
|
1441
|
+
ksm = result['kube_state_metrics']
|
1442
|
+
if ksm.get('installed'):
|
1443
|
+
method = ksm.get('method', 'unknown')
|
1444
|
+
if method == 'helm':
|
1445
|
+
console.print("✅ kube-state-metrics: Deployed via Helm")
|
1446
|
+
elif method == 'manual':
|
1447
|
+
console.print("✅ kube-state-metrics: Deployed via manual YAML")
|
1448
|
+
elif method == 'existing':
|
1449
|
+
console.print("✅ kube-state-metrics: Already deployed")
|
1450
|
+
else:
|
1451
|
+
console.print(f"❌ kube-state-metrics: Failed to deploy")
|
1452
|
+
if ksm.get('error'):
|
1453
|
+
console.print(f" Error: {ksm['error']}")
|
1454
|
+
|
1418
1455
|
if show_info:
|
1419
1456
|
# Get and display monitoring information
|
1420
1457
|
with console.status("Retrieving monitoring stack information..."):
|
k8s_helper/core.py
CHANGED
@@ -1683,10 +1683,346 @@ class K8sClient:
|
|
1683
1683
|
# ======================
|
1684
1684
|
# MONITORING OPERATIONS
|
1685
1685
|
# ======================
|
1686
|
+
|
1687
|
+
def _check_helm_available(self) -> bool:
|
1688
|
+
"""Check if Helm is available in the system"""
|
1689
|
+
import subprocess
|
1690
|
+
try:
|
1691
|
+
result = subprocess.run(['helm', 'version', '--short'],
|
1692
|
+
capture_output=True, text=True, timeout=10)
|
1693
|
+
return result.returncode == 0
|
1694
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
1695
|
+
return False
|
1696
|
+
|
1697
|
+
def _install_kube_state_metrics(self, namespace: str) -> Dict[str, Any]:
|
1698
|
+
"""Install kube-state-metrics using Helm if available, or manual YAML if not"""
|
1699
|
+
import subprocess
|
1700
|
+
|
1701
|
+
result = {
|
1702
|
+
'installed': False,
|
1703
|
+
'method': None,
|
1704
|
+
'error': None
|
1705
|
+
}
|
1706
|
+
|
1707
|
+
# Check if kube-state-metrics is already running
|
1708
|
+
try:
|
1709
|
+
deployments = self.apps_v1.list_deployment_for_all_namespaces()
|
1710
|
+
for deployment in deployments.items:
|
1711
|
+
if 'kube-state-metrics' in deployment.metadata.name:
|
1712
|
+
print(f"✅ kube-state-metrics already deployed in namespace: {deployment.metadata.namespace}")
|
1713
|
+
result['installed'] = True
|
1714
|
+
result['method'] = 'existing'
|
1715
|
+
return result
|
1716
|
+
except Exception as e:
|
1717
|
+
print(f"⚠️ Warning: Could not check existing deployments: {e}")
|
1718
|
+
|
1719
|
+
# Try Helm installation first
|
1720
|
+
if self._check_helm_available():
|
1721
|
+
try:
|
1722
|
+
print("📦 Installing kube-state-metrics using Helm...")
|
1723
|
+
|
1724
|
+
# Add prometheus-community repo if not exists
|
1725
|
+
subprocess.run(['helm', 'repo', 'add', 'prometheus-community',
|
1726
|
+
'https://prometheus-community.github.io/helm-charts'],
|
1727
|
+
capture_output=True, text=True, timeout=30)
|
1728
|
+
|
1729
|
+
# Update repo
|
1730
|
+
subprocess.run(['helm', 'repo', 'update'],
|
1731
|
+
capture_output=True, text=True, timeout=30)
|
1732
|
+
|
1733
|
+
# Install kube-state-metrics
|
1734
|
+
helm_cmd = [
|
1735
|
+
'helm', 'install', 'kube-state-metrics',
|
1736
|
+
'prometheus-community/kube-state-metrics',
|
1737
|
+
'--namespace', namespace,
|
1738
|
+
'--create-namespace',
|
1739
|
+
'--set', 'service.port=8080',
|
1740
|
+
'--set', 'service.targetPort=8080'
|
1741
|
+
]
|
1742
|
+
|
1743
|
+
helm_result = subprocess.run(helm_cmd, capture_output=True, text=True, timeout=120)
|
1744
|
+
|
1745
|
+
if helm_result.returncode == 0:
|
1746
|
+
print("✅ kube-state-metrics installed successfully via Helm")
|
1747
|
+
result['installed'] = True
|
1748
|
+
result['method'] = 'helm'
|
1749
|
+
return result
|
1750
|
+
else:
|
1751
|
+
print(f"⚠️ Helm installation failed: {helm_result.stderr}")
|
1752
|
+
|
1753
|
+
except subprocess.TimeoutExpired:
|
1754
|
+
print("⚠️ Helm installation timed out, falling back to manual installation")
|
1755
|
+
except Exception as e:
|
1756
|
+
print(f"⚠️ Helm installation failed: {e}, falling back to manual installation")
|
1757
|
+
|
1758
|
+
# Fallback to manual YAML installation
|
1759
|
+
try:
|
1760
|
+
print("📦 Installing kube-state-metrics using manual YAML...")
|
1761
|
+
|
1762
|
+
# Create ServiceAccount
|
1763
|
+
service_account = client.V1ServiceAccount(
|
1764
|
+
metadata=client.V1ObjectMeta(
|
1765
|
+
name="kube-state-metrics",
|
1766
|
+
namespace=namespace
|
1767
|
+
)
|
1768
|
+
)
|
1769
|
+
|
1770
|
+
try:
|
1771
|
+
self.core_v1.create_namespaced_service_account(namespace=namespace, body=service_account)
|
1772
|
+
print("✅ Created ServiceAccount for kube-state-metrics")
|
1773
|
+
except ApiException as e:
|
1774
|
+
if e.status == 409:
|
1775
|
+
print("⚠️ ServiceAccount already exists")
|
1776
|
+
else:
|
1777
|
+
raise e
|
1778
|
+
|
1779
|
+
# Create ClusterRole
|
1780
|
+
cluster_role = client.V1ClusterRole(
|
1781
|
+
metadata=client.V1ObjectMeta(name="kube-state-metrics"),
|
1782
|
+
rules=[
|
1783
|
+
client.V1PolicyRule(
|
1784
|
+
api_groups=[""],
|
1785
|
+
resources=["configmaps", "secrets", "nodes", "pods", "services",
|
1786
|
+
"resourcequotas", "replicationcontrollers", "limitranges",
|
1787
|
+
"persistentvolumeclaims", "persistentvolumes", "namespaces", "endpoints"],
|
1788
|
+
verbs=["list", "watch"]
|
1789
|
+
),
|
1790
|
+
client.V1PolicyRule(
|
1791
|
+
api_groups=["apps"],
|
1792
|
+
resources=["statefulsets", "daemonsets", "deployments", "replicasets"],
|
1793
|
+
verbs=["list", "watch"]
|
1794
|
+
),
|
1795
|
+
client.V1PolicyRule(
|
1796
|
+
api_groups=["batch"],
|
1797
|
+
resources=["cronjobs", "jobs"],
|
1798
|
+
verbs=["list", "watch"]
|
1799
|
+
),
|
1800
|
+
client.V1PolicyRule(
|
1801
|
+
api_groups=["autoscaling"],
|
1802
|
+
resources=["horizontalpodautoscalers"],
|
1803
|
+
verbs=["list", "watch"]
|
1804
|
+
),
|
1805
|
+
client.V1PolicyRule(
|
1806
|
+
api_groups=["authentication.k8s.io"],
|
1807
|
+
resources=["tokenreviews"],
|
1808
|
+
verbs=["create"]
|
1809
|
+
),
|
1810
|
+
client.V1PolicyRule(
|
1811
|
+
api_groups=["authorization.k8s.io"],
|
1812
|
+
resources=["subjectaccessreviews"],
|
1813
|
+
verbs=["create"]
|
1814
|
+
),
|
1815
|
+
client.V1PolicyRule(
|
1816
|
+
api_groups=["policy"],
|
1817
|
+
resources=["poddisruptionbudgets"],
|
1818
|
+
verbs=["list", "watch"]
|
1819
|
+
),
|
1820
|
+
client.V1PolicyRule(
|
1821
|
+
api_groups=["certificates.k8s.io"],
|
1822
|
+
resources=["certificatesigningrequests"],
|
1823
|
+
verbs=["list", "watch"]
|
1824
|
+
),
|
1825
|
+
client.V1PolicyRule(
|
1826
|
+
api_groups=["storage.k8s.io"],
|
1827
|
+
resources=["storageclasses", "volumeattachments"],
|
1828
|
+
verbs=["list", "watch"]
|
1829
|
+
),
|
1830
|
+
client.V1PolicyRule(
|
1831
|
+
api_groups=["admissionregistration.k8s.io"],
|
1832
|
+
resources=["mutatingwebhookconfigurations", "validatingwebhookconfigurations"],
|
1833
|
+
verbs=["list", "watch"]
|
1834
|
+
),
|
1835
|
+
client.V1PolicyRule(
|
1836
|
+
api_groups=["networking.k8s.io"],
|
1837
|
+
resources=["networkpolicies", "ingresses"],
|
1838
|
+
verbs=["list", "watch"]
|
1839
|
+
),
|
1840
|
+
client.V1PolicyRule(
|
1841
|
+
api_groups=["coordination.k8s.io"],
|
1842
|
+
resources=["leases"],
|
1843
|
+
verbs=["list", "watch"]
|
1844
|
+
)
|
1845
|
+
]
|
1846
|
+
)
|
1847
|
+
|
1848
|
+
# Create ClusterRole
|
1849
|
+
rbac_v1 = client.RbacAuthorizationV1Api()
|
1850
|
+
try:
|
1851
|
+
rbac_v1.create_cluster_role(body=cluster_role)
|
1852
|
+
print("✅ Created ClusterRole for kube-state-metrics")
|
1853
|
+
except ApiException as e:
|
1854
|
+
if e.status == 409:
|
1855
|
+
print("⚠️ ClusterRole already exists")
|
1856
|
+
else:
|
1857
|
+
raise e
|
1858
|
+
|
1859
|
+
# Create ClusterRoleBinding
|
1860
|
+
# Create subject with version compatibility
|
1861
|
+
try:
|
1862
|
+
# Try V1Subject first (older versions)
|
1863
|
+
subject = client.V1Subject(
|
1864
|
+
kind="ServiceAccount",
|
1865
|
+
name="kube-state-metrics",
|
1866
|
+
namespace=namespace
|
1867
|
+
)
|
1868
|
+
except AttributeError:
|
1869
|
+
# Try RbacV1Subject (newer versions)
|
1870
|
+
try:
|
1871
|
+
subject = client.RbacV1Subject(
|
1872
|
+
kind="ServiceAccount",
|
1873
|
+
name="kube-state-metrics",
|
1874
|
+
namespace=namespace
|
1875
|
+
)
|
1876
|
+
except AttributeError:
|
1877
|
+
# Manual construction as fallback
|
1878
|
+
subject = {
|
1879
|
+
'kind': 'ServiceAccount',
|
1880
|
+
'name': 'kube-state-metrics',
|
1881
|
+
'namespace': namespace
|
1882
|
+
}
|
1883
|
+
|
1884
|
+
cluster_role_binding = client.V1ClusterRoleBinding(
|
1885
|
+
metadata=client.V1ObjectMeta(name="kube-state-metrics"),
|
1886
|
+
subjects=[subject],
|
1887
|
+
role_ref=client.V1RoleRef(
|
1888
|
+
kind="ClusterRole",
|
1889
|
+
name="kube-state-metrics",
|
1890
|
+
api_group="rbac.authorization.k8s.io"
|
1891
|
+
)
|
1892
|
+
)
|
1893
|
+
|
1894
|
+
try:
|
1895
|
+
rbac_v1.create_cluster_role_binding(body=cluster_role_binding)
|
1896
|
+
print("✅ Created ClusterRoleBinding for kube-state-metrics")
|
1897
|
+
except ApiException as e:
|
1898
|
+
if e.status == 409:
|
1899
|
+
print("⚠️ ClusterRoleBinding already exists")
|
1900
|
+
else:
|
1901
|
+
raise e
|
1902
|
+
|
1903
|
+
# Create Deployment
|
1904
|
+
deployment = client.V1Deployment(
|
1905
|
+
metadata=client.V1ObjectMeta(
|
1906
|
+
name="kube-state-metrics",
|
1907
|
+
namespace=namespace,
|
1908
|
+
labels={"app": "kube-state-metrics"}
|
1909
|
+
),
|
1910
|
+
spec=client.V1DeploymentSpec(
|
1911
|
+
replicas=1,
|
1912
|
+
selector=client.V1LabelSelector(
|
1913
|
+
match_labels={"app": "kube-state-metrics"}
|
1914
|
+
),
|
1915
|
+
template=client.V1PodTemplateSpec(
|
1916
|
+
metadata=client.V1ObjectMeta(
|
1917
|
+
labels={"app": "kube-state-metrics"}
|
1918
|
+
),
|
1919
|
+
spec=client.V1PodSpec(
|
1920
|
+
service_account_name="kube-state-metrics",
|
1921
|
+
containers=[
|
1922
|
+
client.V1Container(
|
1923
|
+
name="kube-state-metrics",
|
1924
|
+
image="registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1",
|
1925
|
+
ports=[
|
1926
|
+
client.V1ContainerPort(
|
1927
|
+
name="http-metrics",
|
1928
|
+
container_port=8080,
|
1929
|
+
protocol="TCP"
|
1930
|
+
),
|
1931
|
+
client.V1ContainerPort(
|
1932
|
+
name="telemetry",
|
1933
|
+
container_port=8081,
|
1934
|
+
protocol="TCP"
|
1935
|
+
)
|
1936
|
+
],
|
1937
|
+
liveness_probe=client.V1Probe(
|
1938
|
+
http_get=client.V1HTTPGetAction(
|
1939
|
+
path="/healthz",
|
1940
|
+
port=8080
|
1941
|
+
),
|
1942
|
+
initial_delay_seconds=5,
|
1943
|
+
timeout_seconds=5
|
1944
|
+
),
|
1945
|
+
readiness_probe=client.V1Probe(
|
1946
|
+
http_get=client.V1HTTPGetAction(
|
1947
|
+
path="/",
|
1948
|
+
port=8081
|
1949
|
+
),
|
1950
|
+
initial_delay_seconds=5,
|
1951
|
+
timeout_seconds=5
|
1952
|
+
),
|
1953
|
+
security_context=client.V1SecurityContext(
|
1954
|
+
allow_privilege_escalation=False,
|
1955
|
+
read_only_root_filesystem=True,
|
1956
|
+
run_as_non_root=True,
|
1957
|
+
run_as_user=65534
|
1958
|
+
)
|
1959
|
+
)
|
1960
|
+
]
|
1961
|
+
)
|
1962
|
+
)
|
1963
|
+
)
|
1964
|
+
)
|
1965
|
+
|
1966
|
+
try:
|
1967
|
+
self.apps_v1.create_namespaced_deployment(namespace=namespace, body=deployment)
|
1968
|
+
print("✅ Created Deployment for kube-state-metrics")
|
1969
|
+
except ApiException as e:
|
1970
|
+
if e.status == 409:
|
1971
|
+
print("⚠️ Deployment already exists")
|
1972
|
+
else:
|
1973
|
+
raise e
|
1974
|
+
|
1975
|
+
# Create Service
|
1976
|
+
service = client.V1Service(
|
1977
|
+
metadata=client.V1ObjectMeta(
|
1978
|
+
name="kube-state-metrics",
|
1979
|
+
namespace=namespace,
|
1980
|
+
labels={"app": "kube-state-metrics"}
|
1981
|
+
),
|
1982
|
+
spec=client.V1ServiceSpec(
|
1983
|
+
selector={"app": "kube-state-metrics"},
|
1984
|
+
ports=[
|
1985
|
+
client.V1ServicePort(
|
1986
|
+
name="http-metrics",
|
1987
|
+
port=8080,
|
1988
|
+
target_port=8080,
|
1989
|
+
protocol="TCP"
|
1990
|
+
),
|
1991
|
+
client.V1ServicePort(
|
1992
|
+
name="telemetry",
|
1993
|
+
port=8081,
|
1994
|
+
target_port=8081,
|
1995
|
+
protocol="TCP"
|
1996
|
+
)
|
1997
|
+
],
|
1998
|
+
type="ClusterIP"
|
1999
|
+
)
|
2000
|
+
)
|
2001
|
+
|
2002
|
+
try:
|
2003
|
+
self.core_v1.create_namespaced_service(namespace=namespace, body=service)
|
2004
|
+
print("✅ Created Service for kube-state-metrics")
|
2005
|
+
except ApiException as e:
|
2006
|
+
if e.status == 409:
|
2007
|
+
print("⚠️ Service already exists")
|
2008
|
+
else:
|
2009
|
+
raise e
|
2010
|
+
|
2011
|
+
result['installed'] = True
|
2012
|
+
result['method'] = 'manual'
|
2013
|
+
print("✅ kube-state-metrics installed successfully via manual YAML")
|
2014
|
+
|
2015
|
+
except Exception as e:
|
2016
|
+
result['error'] = str(e)
|
2017
|
+
print(f"❌ Failed to install kube-state-metrics: {e}")
|
2018
|
+
|
2019
|
+
return result
|
2020
|
+
|
1686
2021
|
def setup_monitoring(self, namespace: str = "monitoring",
|
1687
2022
|
grafana_service_type: str = "NodePort",
|
1688
2023
|
import_dashboard: bool = True,
|
1689
|
-
wait_for_ready: bool = True
|
2024
|
+
wait_for_ready: bool = True,
|
2025
|
+
install_kube_state_metrics: bool = True) -> Dict[str, Any]:
|
1690
2026
|
"""Setup complete monitoring stack with Prometheus and Grafana
|
1691
2027
|
|
1692
2028
|
Args:
|
@@ -1694,6 +2030,7 @@ class K8sClient:
|
|
1694
2030
|
grafana_service_type: Service type for Grafana (NodePort, LoadBalancer, ClusterIP)
|
1695
2031
|
import_dashboard: Whether to import default Kubernetes dashboard
|
1696
2032
|
wait_for_ready: Whether to wait for deployments to be ready
|
2033
|
+
install_kube_state_metrics: Whether to install kube-state-metrics for cluster metrics
|
1697
2034
|
|
1698
2035
|
Returns:
|
1699
2036
|
Dictionary with deployment info, URLs, and credentials
|
@@ -1702,6 +2039,7 @@ class K8sClient:
|
|
1702
2039
|
'namespace': namespace,
|
1703
2040
|
'prometheus': {},
|
1704
2041
|
'grafana': {},
|
2042
|
+
'kube_state_metrics': {},
|
1705
2043
|
'success': False,
|
1706
2044
|
'error': None
|
1707
2045
|
}
|
@@ -1710,6 +2048,12 @@ class K8sClient:
|
|
1710
2048
|
# Create monitoring namespace
|
1711
2049
|
self._create_monitoring_namespace(namespace)
|
1712
2050
|
|
2051
|
+
# Install kube-state-metrics if requested
|
2052
|
+
if install_kube_state_metrics:
|
2053
|
+
print("📊 Installing kube-state-metrics for cluster metrics...")
|
2054
|
+
ksm_result = self._install_kube_state_metrics(namespace)
|
2055
|
+
result['kube_state_metrics'] = ksm_result
|
2056
|
+
|
1713
2057
|
# Deploy Prometheus
|
1714
2058
|
prometheus_result = self._deploy_prometheus(namespace)
|
1715
2059
|
result['prometheus'] = prometheus_result
|
@@ -1724,8 +2068,11 @@ class K8sClient:
|
|
1724
2068
|
# Configure Grafana data source and dashboard
|
1725
2069
|
if self._configure_grafana(namespace, import_dashboard):
|
1726
2070
|
result['success'] = True
|
2071
|
+
result['grafana_configured'] = True
|
1727
2072
|
else:
|
1728
|
-
result['
|
2073
|
+
result['success'] = True # Still successful even if config fails
|
2074
|
+
result['grafana_configured'] = False
|
2075
|
+
result['warning'] = "Grafana deployed but automatic configuration failed"
|
1729
2076
|
else:
|
1730
2077
|
result['error'] = "Monitoring deployments failed to become ready"
|
1731
2078
|
else:
|
@@ -1759,7 +2106,7 @@ class K8sClient:
|
|
1759
2106
|
"""Deploy Prometheus to the cluster"""
|
1760
2107
|
result = {'deployed': False, 'service_name': 'prometheus-service'}
|
1761
2108
|
|
1762
|
-
# Prometheus ConfigMap
|
2109
|
+
# Prometheus ConfigMap with kube-state-metrics support
|
1763
2110
|
prometheus_config = """
|
1764
2111
|
global:
|
1765
2112
|
scrape_interval: 15s
|
@@ -1787,6 +2134,29 @@ scrape_configs:
|
|
1787
2134
|
- action: labelmap
|
1788
2135
|
regex: __meta_kubernetes_node_label_(.+)
|
1789
2136
|
|
2137
|
+
- job_name: 'kubernetes-cadvisor'
|
2138
|
+
kubernetes_sd_configs:
|
2139
|
+
- role: node
|
2140
|
+
scheme: https
|
2141
|
+
tls_config:
|
2142
|
+
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
2143
|
+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
2144
|
+
relabel_configs:
|
2145
|
+
- action: labelmap
|
2146
|
+
regex: __meta_kubernetes_node_label_(.+)
|
2147
|
+
- target_label: __address__
|
2148
|
+
replacement: kubernetes.default.svc:443
|
2149
|
+
- source_labels: [__meta_kubernetes_node_name]
|
2150
|
+
regex: (.+)
|
2151
|
+
target_label: __metrics_path__
|
2152
|
+
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
2153
|
+
|
2154
|
+
- job_name: 'kube-state-metrics'
|
2155
|
+
static_configs:
|
2156
|
+
- targets: ['kube-state-metrics.{}:8080']
|
2157
|
+
metrics_path: /metrics
|
2158
|
+
scrape_interval: 30s
|
2159
|
+
|
1790
2160
|
- job_name: 'kubernetes-pods'
|
1791
2161
|
kubernetes_sd_configs:
|
1792
2162
|
- role: pod
|
@@ -1811,7 +2181,36 @@ scrape_configs:
|
|
1811
2181
|
- source_labels: [__meta_kubernetes_pod_name]
|
1812
2182
|
action: replace
|
1813
2183
|
target_label: kubernetes_pod_name
|
1814
|
-
|
2184
|
+
|
2185
|
+
- job_name: 'kubernetes-service-endpoints'
|
2186
|
+
kubernetes_sd_configs:
|
2187
|
+
- role: endpoints
|
2188
|
+
relabel_configs:
|
2189
|
+
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
2190
|
+
action: keep
|
2191
|
+
regex: true
|
2192
|
+
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
2193
|
+
action: replace
|
2194
|
+
target_label: __scheme__
|
2195
|
+
regex: (https?)
|
2196
|
+
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
2197
|
+
action: replace
|
2198
|
+
target_label: __metrics_path__
|
2199
|
+
regex: (.+)
|
2200
|
+
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
2201
|
+
action: replace
|
2202
|
+
target_label: __address__
|
2203
|
+
regex: ([^:]+)(?::\\d+)?;(\\d+)
|
2204
|
+
replacement: $1:$2
|
2205
|
+
- action: labelmap
|
2206
|
+
regex: __meta_kubernetes_service_label_(.+)
|
2207
|
+
- source_labels: [__meta_kubernetes_namespace]
|
2208
|
+
action: replace
|
2209
|
+
target_label: kubernetes_namespace
|
2210
|
+
- source_labels: [__meta_kubernetes_service_name]
|
2211
|
+
action: replace
|
2212
|
+
target_label: kubernetes_name
|
2213
|
+
""".format(namespace)
|
1815
2214
|
|
1816
2215
|
# Create ConfigMap
|
1817
2216
|
configmap = client.V1ConfigMap(
|
@@ -2124,9 +2523,6 @@ scrape_configs:
|
|
2124
2523
|
def _configure_grafana(self, namespace: str, import_dashboard: bool = True) -> bool:
|
2125
2524
|
"""Configure Grafana with Prometheus data source and dashboard"""
|
2126
2525
|
try:
|
2127
|
-
# Wait a bit for Grafana to fully start
|
2128
|
-
time.sleep(10)
|
2129
|
-
|
2130
2526
|
import requests
|
2131
2527
|
import json
|
2132
2528
|
|
@@ -2138,6 +2534,32 @@ scrape_configs:
|
|
2138
2534
|
|
2139
2535
|
print(f"🔧 Configuring Grafana at {grafana_url}")
|
2140
2536
|
|
2537
|
+
# Wait for Grafana to be accessible with retry logic
|
2538
|
+
max_retries = 12 # 2 minutes total (12 * 10 seconds)
|
2539
|
+
retry_count = 0
|
2540
|
+
auth = ('admin', 'admin123')
|
2541
|
+
|
2542
|
+
while retry_count < max_retries:
|
2543
|
+
try:
|
2544
|
+
# Test connection to Grafana
|
2545
|
+
health_response = requests.get(
|
2546
|
+
f"{grafana_url}/api/health",
|
2547
|
+
auth=auth,
|
2548
|
+
timeout=10
|
2549
|
+
)
|
2550
|
+
if health_response.status_code == 200:
|
2551
|
+
print("✅ Grafana is accessible")
|
2552
|
+
break
|
2553
|
+
except Exception as e:
|
2554
|
+
retry_count += 1
|
2555
|
+
if retry_count < max_retries:
|
2556
|
+
print(f"⏳ Waiting for Grafana to be accessible... (attempt {retry_count}/{max_retries})")
|
2557
|
+
time.sleep(10)
|
2558
|
+
else:
|
2559
|
+
print(f"❌ Grafana not accessible after {max_retries} attempts")
|
2560
|
+
print("💡 You can manually add Prometheus as a data source in Grafana")
|
2561
|
+
return False
|
2562
|
+
|
2141
2563
|
# Add Prometheus data source
|
2142
2564
|
datasource_payload = {
|
2143
2565
|
"name": "Prometheus",
|
@@ -2147,20 +2569,27 @@ scrape_configs:
|
|
2147
2569
|
"isDefault": True
|
2148
2570
|
}
|
2149
2571
|
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2153
|
-
|
2154
|
-
|
2155
|
-
|
2156
|
-
|
2157
|
-
|
2158
|
-
|
2159
|
-
|
2160
|
-
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2572
|
+
# Try to add data source with retry
|
2573
|
+
for attempt in range(3):
|
2574
|
+
try:
|
2575
|
+
response = requests.post(
|
2576
|
+
f"{grafana_url}/api/datasources",
|
2577
|
+
json=datasource_payload,
|
2578
|
+
auth=auth,
|
2579
|
+
timeout=30
|
2580
|
+
)
|
2581
|
+
|
2582
|
+
if response.status_code in [200, 409]: # Success or already exists
|
2583
|
+
print("✅ Prometheus data source configured")
|
2584
|
+
break
|
2585
|
+
else:
|
2586
|
+
print(f"⚠️ Attempt {attempt + 1}: Could not add Prometheus data source: {response.text}")
|
2587
|
+
if attempt < 2:
|
2588
|
+
time.sleep(5)
|
2589
|
+
except Exception as e:
|
2590
|
+
print(f"⚠️ Attempt {attempt + 1}: Error adding data source: {e}")
|
2591
|
+
if attempt < 2:
|
2592
|
+
time.sleep(5)
|
2164
2593
|
|
2165
2594
|
# Import default dashboard if requested
|
2166
2595
|
if import_dashboard:
|
@@ -0,0 +1,11 @@
|
|
1
|
+
k8s_helper/__init__.py,sha256=wtG9p8ZbOO4WDPZxf3acQ_pNu4x92MJBnwI9l8G7LCc,2666
|
2
|
+
k8s_helper/cli.py,sha256=dBqPBfSNoh3NAFq5qiyERD0uV6Entf-rEwpuBhuYHGk,81655
|
3
|
+
k8s_helper/config.py,sha256=P7YdfyvCHprrNs2J9DRb3RrClylfTTh5hfTtDzLug0A,6867
|
4
|
+
k8s_helper/core.py,sha256=rf4dwcrV9fTyRZrWTucslOHJDTKhlnMXKqV_OWTIYQ8,130172
|
5
|
+
k8s_helper/utils.py,sha256=wYgTd5ktyuI-EiVcfW7FrxA7MzXY5odrEKQgmMVdueY,9496
|
6
|
+
k8s_helper_cli-0.5.0.dist-info/licenses/LICENSE,sha256=tXPvVl3gLVc6e0qCEoLH9KjeA7z4JVL78UybpvGtBCw,1096
|
7
|
+
k8s_helper_cli-0.5.0.dist-info/METADATA,sha256=dHNcGQ7c-7KH9Z98yj4DFcXphLjDtQtoAt0cYaxEsSc,30789
|
8
|
+
k8s_helper_cli-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
k8s_helper_cli-0.5.0.dist-info/entry_points.txt,sha256=IoCMWUZ6mn90LwzQzEy5YkWOwvogDdZ6ycqUWAzCFTQ,50
|
10
|
+
k8s_helper_cli-0.5.0.dist-info/top_level.txt,sha256=x9A1jflyer-z2cFnkqk5B42juoH2q0fy5hkT9upsTG8,11
|
11
|
+
k8s_helper_cli-0.5.0.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
k8s_helper/__init__.py,sha256=YuM-ES4NKCK-M1VusIhUSIsO8vigHDWGPyAS6bvShgw,2666
|
2
|
-
k8s_helper/cli.py,sha256=ngXnZ-6EaTm9hCViLPOLZya4HtFDsqpPbLYnOBEfQmA,79400
|
3
|
-
k8s_helper/config.py,sha256=P7YdfyvCHprrNs2J9DRb3RrClylfTTh5hfTtDzLug0A,6867
|
4
|
-
k8s_helper/core.py,sha256=HCsDx8xApBvgDooR3XqaW2ZeIzQU35oGm6rfjTBzYVc,110654
|
5
|
-
k8s_helper/utils.py,sha256=wYgTd5ktyuI-EiVcfW7FrxA7MzXY5odrEKQgmMVdueY,9496
|
6
|
-
k8s_helper_cli-0.4.2.dist-info/licenses/LICENSE,sha256=tXPvVl3gLVc6e0qCEoLH9KjeA7z4JVL78UybpvGtBCw,1096
|
7
|
-
k8s_helper_cli-0.4.2.dist-info/METADATA,sha256=p6tE1EFVcCIGx5vwv-L-EDB-K5VRbOtAXcc5z2K5dXM,30789
|
8
|
-
k8s_helper_cli-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
-
k8s_helper_cli-0.4.2.dist-info/entry_points.txt,sha256=IoCMWUZ6mn90LwzQzEy5YkWOwvogDdZ6ycqUWAzCFTQ,50
|
10
|
-
k8s_helper_cli-0.4.2.dist-info/top_level.txt,sha256=x9A1jflyer-z2cFnkqk5B42juoH2q0fy5hkT9upsTG8,11
|
11
|
-
k8s_helper_cli-0.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|