konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,191 @@
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ labels:
5
+ {% if not general %}
6
+ {{ model_name_label }}: {{ name }}
7
+ model.aibrix.ai/port: "{{ ports }}"
8
+ {% endif %}
9
+ {{ deployment_name_label }}: "{{ name }}"
10
+ {{ deployment_user_label }}: "{{ user }}"
11
+ {{ deployment_accelerator_label }}: "{{ accelerator_type }}"
12
+ {{ deployment_num_accelerators_label }}: "{{ num_accelerators }}"
13
+ trainy.ai/has-autoscaler: "{{ autoscaler }}"
14
+ trainy.ai/konduktor-managed: "true"
15
+ {% if autoscaler == 'true' %}
16
+ trainy.ai/original-min-replicas: "{{ min_replicas }}"
17
+ trainy.ai/original-max-replicas: "{{ max_replicas }}"
18
+ {% endif %}
19
+ name: {{ name }}
20
+ namespace: default
21
+ spec:
22
+ replicas: {{ min_replicas }}
23
+ selector:
24
+ matchLabels:
25
+ {% if not general %}
26
+ {{ model_name_label }}: {{ name }}
27
+ {% endif %}
28
+ {{ deployment_name_label }}: "{{ name }}"
29
+ template: {}
30
+
31
+ ---
32
+
33
+ apiVersion: v1
34
+ kind: Service
35
+ metadata:
36
+ labels:
37
+ {% if not general %}
38
+ {{ model_name_label }}: {{ name }}
39
+ {% endif %}
40
+ prometheus-discovery: "true"
41
+ {{ deployment_name_label }}: "{{ name }}"
42
+ {{ deployment_user_label }}: "{{ user }}"
43
+ trainy.ai/has-autoscaler: "{{ autoscaler }}"
44
+ annotations:
45
+ prometheus.io/scrape: "true"
46
+ prometheus.io/port: "9000"
47
+ name: {{ name }}
48
+ namespace: default
49
+ spec:
50
+ ports:
51
+ - name: serve
52
+ port: {{ ports }}
53
+ protocol: TCP
54
+ targetPort: {{ ports }}
55
+ {% if not general %}
56
+ - name: http
57
+ port: 8080
58
+ protocol: TCP
59
+ targetPort: 8080
60
+ {% endif %}
61
+ selector:
62
+ {% if not general %}
63
+ {{ model_name_label }}: {{ name }}
64
+ {% endif %}
65
+ {{ deployment_name_label }}: "{{ name }}"
66
+ type: ClusterIP
67
+
68
+ # AIBRIX PODAUTOSCALER STUFF (KPA)
69
+ {% if not general and autoscaler == 'true' %}
70
+ ---
71
+ apiVersion: autoscaling.aibrix.ai/v1alpha1
72
+ kind: PodAutoscaler
73
+ metadata:
74
+ name: {{ name }}-pa
75
+ namespace: default
76
+ labels:
77
+ {{ model_name_label }}: {{ name }}
78
+ app.kubernetes.io/name: aibrix
79
+ app.kubernetes.io/managed-by: kustomize
80
+ {{ deployment_name_label }}: "{{ name }}"
81
+ {{ deployment_user_label }}: "{{ user }}"
82
+ spec:
83
+ scalingStrategy: KPA
84
+ minReplicas: {{ min_replicas }}
85
+ maxReplicas: {{ max_replicas }}
86
+ metricsSources:
87
+ - metricSourceType: domain
88
+ protocolType: http
89
+ endpoint: aibrix-activator.aibrix-activator.svc.cluster.local:8080
90
+ path: /metrics/default/{{ name }}
91
+ targetMetric: vllm:deployment_replicas
92
+ targetValue: "1"
93
+ scaleTargetRef:
94
+ apiVersion: apps/v1
95
+ kind: Deployment
96
+ name: {{ name }}
97
+ {% endif %}
98
+
99
+ # KEDA HTTP ADD-ON STUFF (1 per deployment)
100
+ {% if general %}
101
+ {% if autoscaler == 'true' %}
102
+ # HTTPScaledObject (1 per deployment) - only when autoscaling enabled
103
+ ---
104
+ apiVersion: http.keda.sh/v1alpha1
105
+ kind: HTTPScaledObject
106
+ metadata:
107
+ name: {{ name }}-httpscaledobject
108
+ namespace: default
109
+ labels:
110
+ {{ deployment_name_label }}: "{{ name }}"
111
+ {{ deployment_user_label }}: "{{ user }}"
112
+ spec:
113
+ hosts:
114
+ - {{ name }}
115
+ pathPrefixes:
116
+ - "/"
117
+ {% if probe_path %}
118
+ - "{{ probe_path }}"
119
+ {% endif %}
120
+ scaleTargetRef:
121
+ name: "{{ name }}"
122
+ kind: Deployment
123
+ apiVersion: apps/v1
124
+ service: "{{ name }}"
125
+ port: {{ ports }}
126
+ replicas:
127
+ min: {{ min_replicas }}
128
+ max: {{ max_replicas }}
129
+ scaledownPeriod: 1200 # 20 minutes
130
+ scalingMetric:
131
+ requestRate:
132
+ targetValue: 4
133
+ granularity: "1s"
134
+ window: "30s"
135
+ {% endif %}
136
+
137
+ # INGRESS (1 per deployment)
138
+ ---
139
+ apiVersion: networking.k8s.io/v1
140
+ kind: Ingress
141
+ metadata:
142
+ name: {{ name }}-ingress
143
+ labels:
144
+ {{ deployment_name_label }}: "{{ name }}"
145
+ {{ deployment_user_label }}: "{{ user }}"
146
+ trainy.ai/konduktor-managed: "true"
147
+ annotations:
148
+ nginx.ingress.kubernetes.io/use-regex: "true"
149
+ nginx.ingress.kubernetes.io/rewrite-target: /$1
150
+ {% if autoscaler == 'true' %}
151
+ nginx.ingress.kubernetes.io/upstream-vhost: "{{ name }}"
152
+ {% endif %}
153
+ spec:
154
+ ingressClassName: nginx
155
+ rules:
156
+ - host: {{ general_base_host }}
157
+ http:
158
+ paths:
159
+ - path: /{{ name }}(.*)
160
+ pathType: ImplementationSpecific
161
+ backend:
162
+ service:
163
+ {% if autoscaler == 'true' %}
164
+ # Use KEDA interceptor for autoscaling
165
+ name: keda-proxy
166
+ port:
167
+ number: 8080
168
+ {% else %}
169
+ # Direct to app service for fixed replicas
170
+ name: {{ name }}
171
+ port:
172
+ number: {{ ports }}
173
+ {% endif %}
174
+ # Direct access convenience rule (via LB IP + Host: {{ name }})
175
+ - host: {{ name }}
176
+ http:
177
+ paths:
178
+ - path: /(.*)
179
+ pathType: ImplementationSpecific
180
+ backend:
181
+ service:
182
+ {% if autoscaler == 'true' %}
183
+ name: keda-proxy
184
+ port:
185
+ number: 8080
186
+ {% else %}
187
+ name: {{ name }}
188
+ port:
189
+ number: {{ ports }}
190
+ {% endif %}
191
+ {% endif %}
@@ -0,0 +1,43 @@
1
+ jobset:
2
+ apiVersion: jobset.x-k8s.io/v1alpha2
3
+ kind: JobSet
4
+ metadata:
5
+ name: {{ job_name }}
6
+ labels:
7
+ {{ jobset_name_label }}: "{{ job_name }}"
8
+ {{ jobset_userid_label }}: "{{ user_id }}"
9
+ {{ jobset_user_label }}: "{{ user }}"
10
+ {% if accelerator_type %}
11
+ {{ jobset_accelerator_label }}: "{{ accelerator_type }}"
12
+ {{ jobset_num_accelerators_label }}: "{{ num_accelerators }}"
13
+ {% endif %}
14
+ {% if max_execution_time %}
15
+ {{ jobset_max_execution_time_label }}: "{{ max_execution_time }}"
16
+ {% endif %}
17
+ trainy.ai/konduktor-managed: "true"
18
+ parent: "trainy"
19
+ annotations: {}
20
+ spec:
21
+ ttlSecondsAfterFinished: 31536000 # 1 year (365 days)
22
+ {% if max_restarts is not none %}
23
+ failurePolicy:
24
+ maxRestarts: {{ max_restarts }}
25
+ {% endif %}
26
+ replicatedJobs:
27
+ - name: workers
28
+ template:
29
+ spec:
30
+ ttlSecondsAfterFinished: 600 # 5 minutes
31
+ parallelism: {{ num_nodes }}
32
+ {% if completions %}
33
+ completions: {{ completions }}
34
+ {% else %}
35
+ completions: {{ num_nodes }}
36
+ {% endif %}
37
+ backoffLimit: 0
38
+ template: {}
39
+ podFailurePolicy:
40
+ rules:
41
+ - action: FailJob
42
+ onPodConditions:
43
+ - type: ConfigIssue