codeflowhub 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeflowhub/__init__.py +13 -0
- codeflowhub/action.py +5 -0
- codeflowhub/base.py +73 -0
- codeflowhub/flow.py +560 -0
- codeflowhub/model.py +19 -0
- codeflowhub/service/__init__.py +3 -0
- codeflowhub/service/airflow_exporter.py +607 -0
- codeflowhub/storage/__init__.py +11 -0
- codeflowhub/storage/local_storage.py +24 -0
- codeflowhub/storage/s3_storage.py +87 -0
- codeflowhub/storage/storage.py +16 -0
- codeflowhub/task.py +107 -0
- codeflowhub/template/__init__.py +4 -0
- codeflowhub/template/analyze_speaker_pkg/__init__.py +0 -0
- codeflowhub/template/analyze_speaker_pkg/main.py +155 -0
- codeflowhub/template/extract_voice_pkg/__init__.py +0 -0
- codeflowhub/template/extract_voice_pkg/main.py +119 -0
- codeflowhub/template/read_pdf_pkg/__init__.py +0 -0
- codeflowhub/template/read_pdf_pkg/main.py +161 -0
- codeflowhub/template/transcript_pkg/__init__.py +0 -0
- codeflowhub/template/transcript_pkg/main.py +143 -0
- codeflowhub-0.0.1.dist-info/METADATA +19 -0
- codeflowhub-0.0.1.dist-info/RECORD +25 -0
- codeflowhub-0.0.1.dist-info/WHEEL +5 -0
- codeflowhub-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import base64
|
|
3
|
+
import sys
|
|
4
|
+
import hashlib
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
class AirflowExporter:
|
|
10
|
+
"""Airflow DAG로 export하는 서비스"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, flow_decorator):
|
|
13
|
+
self.flow = flow_decorator
|
|
14
|
+
self.dag_id = (flow_decorator.flow_name or flow_decorator.name).replace('_', '-')
|
|
15
|
+
|
|
16
|
+
# workflow 파일명 추출
|
|
17
|
+
workflow_file_path = sys.modules[flow_decorator.func.__module__].__file__
|
|
18
|
+
self.workflow_filename = os.path.basename(workflow_file_path)
|
|
19
|
+
|
|
20
|
+
# 선택된 환경의 설정 사용
|
|
21
|
+
self.env = flow_decorator.env
|
|
22
|
+
self.current_env = flow_decorator.current_env
|
|
23
|
+
|
|
24
|
+
# K8s 설정
|
|
25
|
+
self.k8s_image = self.env.get('K8S_IMAGE', 'python:3.11-slim')
|
|
26
|
+
self.k8s_namespace = self.env.get('K8S_NAMESPACE', 'airflow')
|
|
27
|
+
|
|
28
|
+
# XCom sidecar image: flow의 airflow_sidecar_image를 우선 사용, 없으면 env에서
|
|
29
|
+
self.xcom_sidecar_image = flow_decorator.airflow_sidecar_image or self.env.get('XCOM_SIDECAR_IMAGE', None)
|
|
30
|
+
|
|
31
|
+
# 추가 패키지 경로들 (env에서 설정 가능)
|
|
32
|
+
self.extra_packages = self.env.get('EXTRA_PACKAGES', [])
|
|
33
|
+
|
|
34
|
+
# ConfigMap 설정
|
|
35
|
+
self.configmap_name = f'{self.dag_id}-code'
|
|
36
|
+
self.configmap_mount_path = '/flowhub/code'
|
|
37
|
+
|
|
38
|
+
# Common K8s 설정
|
|
39
|
+
self.common_config = self.env.get('K8S_COMMON', {
|
|
40
|
+
'namespace': self.k8s_namespace,
|
|
41
|
+
'cmds': ['/bin/sh', '-c'],
|
|
42
|
+
'do_xcom_push': True,
|
|
43
|
+
'startup_timeout_seconds': 3600,
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
# 커스텀 CLI 인자 가져오기 (FlowDecorator에서 저장된 값)
|
|
47
|
+
from ..flow import FlowDecorator
|
|
48
|
+
self.custom_cli_args = FlowDecorator._custom_cli_args.copy()
|
|
49
|
+
|
|
50
|
+
# Flow의 annotations, service_account_name, volumes가 있으면 common_config에 추가
|
|
51
|
+
if flow_decorator.annotations:
|
|
52
|
+
self.common_config['annotations'] = flow_decorator.annotations
|
|
53
|
+
if flow_decorator.service_account_name:
|
|
54
|
+
self.common_config['service_account_name'] = flow_decorator.service_account_name
|
|
55
|
+
if flow_decorator.volumes:
|
|
56
|
+
# Volume 객체를 k8s.V1Volume 형태로 변환
|
|
57
|
+
volumes_list = []
|
|
58
|
+
for vol in flow_decorator.volumes:
|
|
59
|
+
if hasattr(vol, 'name') and hasattr(vol, 'persistent_volume_claim'):
|
|
60
|
+
volumes_list.append({
|
|
61
|
+
'name': vol.name,
|
|
62
|
+
'persistent_volume_claim': vol.persistent_volume_claim
|
|
63
|
+
})
|
|
64
|
+
if volumes_list:
|
|
65
|
+
self.common_config['volumes'] = volumes_list
|
|
66
|
+
|
|
67
|
+
def export(self,
|
|
68
|
+
output_path: str = None,
|
|
69
|
+
schedule_interval: Optional[str] = None,
|
|
70
|
+
start_date: Optional[datetime] = None,
|
|
71
|
+
default_args: Optional[dict] = None):
|
|
72
|
+
"""Airflow DAG Python 파일로 export"""
|
|
73
|
+
if output_path is None:
|
|
74
|
+
output_path = f'dags/{self.dag_id}_dag.py'
|
|
75
|
+
|
|
76
|
+
if start_date is None:
|
|
77
|
+
start_date = datetime(2024, 1, 1)
|
|
78
|
+
|
|
79
|
+
if default_args is None:
|
|
80
|
+
default_args = {
|
|
81
|
+
'owner': 'flowhub',
|
|
82
|
+
'start_date': start_date,
|
|
83
|
+
'retries': 1,
|
|
84
|
+
'retry_delay': timedelta(minutes=5),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# 디렉토리 생성
|
|
88
|
+
dir_path = os.path.dirname(output_path)
|
|
89
|
+
if dir_path:
|
|
90
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
# ConfigMap YAML 생성 (repo가 없을 때만)
|
|
93
|
+
configmap_path = None
|
|
94
|
+
if not self.flow.repo:
|
|
95
|
+
configmap_path = self._generate_configmap_yaml(dir_path)
|
|
96
|
+
|
|
97
|
+
# DAG 코드 생성
|
|
98
|
+
dag_code = self._generate_dag_code(schedule_interval, start_date, default_args)
|
|
99
|
+
|
|
100
|
+
# 파일 저장
|
|
101
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
102
|
+
f.write(dag_code)
|
|
103
|
+
f.flush()
|
|
104
|
+
os.fsync(f.fileno())
|
|
105
|
+
|
|
106
|
+
print(f'✅ Airflow DAG exported to: {output_path}')
|
|
107
|
+
if configmap_path:
|
|
108
|
+
print(f'✅ ConfigMap YAML exported to: {configmap_path}')
|
|
109
|
+
print(f'📋 Apply ConfigMap: kubectl apply -f {configmap_path}')
|
|
110
|
+
return output_path
|
|
111
|
+
|
|
112
|
+
def _generate_dag_code(self, schedule_interval, start_date, default_args):
|
|
113
|
+
"""DAG Python 코드 생성 (target.py 스타일)"""
|
|
114
|
+
|
|
115
|
+
flow_name = self.flow.flow_name or self.flow.name
|
|
116
|
+
header = f'''"""
|
|
117
|
+
Airflow DAG for Workflow: {flow_name}
|
|
118
|
+
Auto-generated from FloWhub on {datetime.now().isoformat()}
|
|
119
|
+
"""
|
|
120
|
+
from datetime import datetime, timedelta
|
|
121
|
+
from airflow import DAG
|
|
122
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
123
|
+
from kubernetes.client import models as k8s
|
|
124
|
+
'''
|
|
125
|
+
|
|
126
|
+
# XCom sidecar 설정
|
|
127
|
+
if self.xcom_sidecar_image:
|
|
128
|
+
header += f'''
|
|
129
|
+
from airflow.providers.cncf.kubernetes.utils.xcom_sidecar import PodDefaults
|
|
130
|
+
PodDefaults.SIDECAR_CONTAINER.image = '{self.xcom_sidecar_image}'
|
|
131
|
+
'''
|
|
132
|
+
|
|
133
|
+
# 공통 setup 스크립트
|
|
134
|
+
if self.flow.repo:
|
|
135
|
+
# repo가 있으면 git clone
|
|
136
|
+
setup_script = f"git clone {self.flow.repo} /app\n mkdir -p /app/input"
|
|
137
|
+
else:
|
|
138
|
+
# ConfigMap에서 코드 복사 (base64 → 마운트된 파일 사용)
|
|
139
|
+
setup_script = f"mkdir -p /app/input\n cp {self.configmap_mount_path}/{self.workflow_filename} /app/{self.workflow_filename}"
|
|
140
|
+
if self.extra_packages:
|
|
141
|
+
for pkg_path in self.extra_packages:
|
|
142
|
+
if os.path.exists(pkg_path):
|
|
143
|
+
pkg_name = os.path.basename(pkg_path)
|
|
144
|
+
pkg_tar_name = f'{pkg_name}.tar.gz'
|
|
145
|
+
setup_script += f"\n tar -xzf {self.configmap_mount_path}/{pkg_tar_name} -C /app"
|
|
146
|
+
else:
|
|
147
|
+
raise FileNotFoundError(
|
|
148
|
+
f"❌ EXTRA_PACKAGES에 지정된 패키지를 찾을 수 없습니다: {pkg_path}\n"
|
|
149
|
+
f" 현재 작업 디렉토리: {os.getcwd()}\n"
|
|
150
|
+
f" 절대 경로로 지정하거나, workflow 파일과 같은 위치에 패키지가 있는지 확인하세요."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
header += f'''# Common setup script
|
|
154
|
+
SETUP_SCRIPT = """{setup_script}"""
|
|
155
|
+
|
|
156
|
+
'''
|
|
157
|
+
|
|
158
|
+
# DAG 정의
|
|
159
|
+
default_args_str = self._format_dict(default_args, base_indent=1)
|
|
160
|
+
description = self.flow.description or f'{flow_name} workflow'
|
|
161
|
+
tags_str = repr(self.flow.tags) if self.flow.tags else f"['{self.flow.namespace}']"
|
|
162
|
+
|
|
163
|
+
# Params 설정
|
|
164
|
+
params_line = ""
|
|
165
|
+
if self.flow.params:
|
|
166
|
+
params_str = self._format_dict(self.flow.params, indent=1)
|
|
167
|
+
params_line = f"\n params={params_str},"
|
|
168
|
+
|
|
169
|
+
dag_definition = f'''dag = DAG(
|
|
170
|
+
'{self.dag_id}',
|
|
171
|
+
default_args={default_args_str},
|
|
172
|
+
description='{description}',
|
|
173
|
+
schedule_interval={repr(schedule_interval)},
|
|
174
|
+
catchup=False,
|
|
175
|
+
tags={tags_str},{params_line}
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
'''
|
|
179
|
+
|
|
180
|
+
# Common 설정 - ConfigMap 볼륨/마운트 추가
|
|
181
|
+
if not self.flow.repo:
|
|
182
|
+
# ConfigMap 볼륨/마운트를 common에 직접 포함 (값을 직접 인라인)
|
|
183
|
+
configmap_volume = f"k8s.V1Volume(\n name='code-volume',\n config_map=k8s.V1ConfigMapVolumeSource(name='{self.configmap_name}')\n )"
|
|
184
|
+
configmap_volume_mount = f"k8s.V1VolumeMount(\n name='code-volume',\n mount_path='{self.configmap_mount_path}',\n read_only=True\n )"
|
|
185
|
+
|
|
186
|
+
common_config_copy = self.common_config.copy()
|
|
187
|
+
|
|
188
|
+
# 기존 volumes가 있으면 configmap_volume 추가, 없으면 새로 생성
|
|
189
|
+
if 'volumes' in common_config_copy:
|
|
190
|
+
existing_volumes_list = self._format_volumes_list(common_config_copy['volumes'])
|
|
191
|
+
existing_volumes_list.append(configmap_volume)
|
|
192
|
+
common_config_copy['volumes'] = '[\n ' + ',\n '.join(existing_volumes_list) + '\n ]'
|
|
193
|
+
else:
|
|
194
|
+
common_config_copy['volumes'] = f'[\n {configmap_volume}\n ]'
|
|
195
|
+
|
|
196
|
+
# volume_mounts는 common에 포함하지 않음 (task별로 병합해야 하므로)
|
|
197
|
+
# 대신 base_volume_mounts 변수로 분리
|
|
198
|
+
common_config_str = self._format_dict_with_raw_values(common_config_copy, indent=0)
|
|
199
|
+
common_definition = f'''common = {common_config_str}
|
|
200
|
+
|
|
201
|
+
# Base volume mounts (ConfigMap mount)
|
|
202
|
+
base_volume_mounts = [
|
|
203
|
+
{configmap_volume_mount}
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
'''
|
|
207
|
+
else:
|
|
208
|
+
common_config_str = self._format_dict(self.common_config, indent=0)
|
|
209
|
+
common_definition = f'''common = {common_config_str}
|
|
210
|
+
|
|
211
|
+
'''
|
|
212
|
+
|
|
213
|
+
# Task 정의들 (일반 workflow tasks만)
|
|
214
|
+
tasks_code = "with dag:\n"
|
|
215
|
+
for i, task in enumerate(self.flow.depend):
|
|
216
|
+
task_code = self._generate_task_operator(task, is_first=(i == 0))
|
|
217
|
+
tasks_code += task_code + "\n"
|
|
218
|
+
|
|
219
|
+
# Failure handler task 추가 (trigger_rule='one_failed')
|
|
220
|
+
if self.flow.on_failure:
|
|
221
|
+
failure_task_code = self._generate_task_operator(
|
|
222
|
+
self.flow.on_failure,
|
|
223
|
+
is_first=True,
|
|
224
|
+
trigger_rule='one_failed'
|
|
225
|
+
)
|
|
226
|
+
tasks_code += failure_task_code + "\n"
|
|
227
|
+
|
|
228
|
+
# Task 의존성
|
|
229
|
+
dependencies = self._generate_dependencies()
|
|
230
|
+
if dependencies:
|
|
231
|
+
tasks_code += " # Task dependencies\n"
|
|
232
|
+
for dep in dependencies:
|
|
233
|
+
tasks_code += f" {dep}\n"
|
|
234
|
+
|
|
235
|
+
# Failure handler 의존성 추가
|
|
236
|
+
if self.flow.on_failure:
|
|
237
|
+
last_tasks = self._find_last_tasks()
|
|
238
|
+
if last_tasks:
|
|
239
|
+
tasks_code += f" # Failure handler dependency\n"
|
|
240
|
+
tasks_code += f" [{', '.join(last_tasks)}] >> {self.flow.on_failure.name}\n"
|
|
241
|
+
|
|
242
|
+
return header + dag_definition + common_definition + tasks_code
|
|
243
|
+
|
|
244
|
+
def _generate_task_operator(self, task, is_first=False, trigger_rule=None):
|
|
245
|
+
"""KubernetesPodOperator 생성"""
|
|
246
|
+
input_commands = self._generate_input_commands(task, is_first)
|
|
247
|
+
|
|
248
|
+
# 커스텀 CLI 인자를 명령줄에 추가
|
|
249
|
+
custom_args_str = self._build_custom_cli_args()
|
|
250
|
+
|
|
251
|
+
arguments = f'''
|
|
252
|
+
{{SETUP_SCRIPT}}
|
|
253
|
+
{input_commands}
|
|
254
|
+
cd /app && python3 -u {self.workflow_filename} --env {self.current_env} --id {{{{{{{{ dag_run.run_id }}}}}}}} --task {task.name}{custom_args_str} --input /app/input --output /airflow/xcom/return.json
|
|
255
|
+
'''
|
|
256
|
+
|
|
257
|
+
task_image = task.image if hasattr(task, 'image') and task.image else self.k8s_image
|
|
258
|
+
|
|
259
|
+
tolerations_code = self._build_tolerations_code(task)
|
|
260
|
+
node_selector_code = self._build_node_selector_code(task)
|
|
261
|
+
volume_mounts_code = self._build_volume_mounts_code(task)
|
|
262
|
+
container_resources_code = self._build_container_resources_code(task)
|
|
263
|
+
|
|
264
|
+
# trigger_rule 추가
|
|
265
|
+
trigger_rule_code = f"\n trigger_rule='{trigger_rule}'," if trigger_rule else ""
|
|
266
|
+
|
|
267
|
+
# failure handler는 retries=0 설정
|
|
268
|
+
retries_code = "\n retries=0," if trigger_rule == 'one_failed' else ""
|
|
269
|
+
|
|
270
|
+
operator_code = f''' {task.name} = KubernetesPodOperator(
|
|
271
|
+
**common,
|
|
272
|
+
task_id='{task.name}',
|
|
273
|
+
image='{task_image}',{trigger_rule_code}{retries_code}{tolerations_code}{node_selector_code}{volume_mounts_code}{container_resources_code}
|
|
274
|
+
arguments=[
|
|
275
|
+
f\'\'\'{arguments}\'\'\'
|
|
276
|
+
],
|
|
277
|
+
)'''
|
|
278
|
+
|
|
279
|
+
return operator_code
|
|
280
|
+
|
|
281
|
+
def _generate_input_commands(self, task, is_first):
|
|
282
|
+
"""Input 데이터 명령어 생성"""
|
|
283
|
+
if is_first:
|
|
284
|
+
return "echo '{{{{ params | tojson }}}}' >> /app/input/0.json"
|
|
285
|
+
if not task.depend:
|
|
286
|
+
return "echo '{{}}' >> /app/input/0.json"
|
|
287
|
+
|
|
288
|
+
commands = [f"echo '{{{{{{{{ ti.xcom_pull(task_ids=\"{dep.name}\") | tojson }}}}}}}}' >> /app/input/{i}.json"
|
|
289
|
+
for i, dep in enumerate(task.depend)]
|
|
290
|
+
return "\n ".join(commands)
|
|
291
|
+
|
|
292
|
+
def _build_tolerations_code(self, task):
|
|
293
|
+
"""Tolerations 코드 생성"""
|
|
294
|
+
if not (hasattr(task, 'tolerations') and task.tolerations):
|
|
295
|
+
return ""
|
|
296
|
+
|
|
297
|
+
tolerations_items = []
|
|
298
|
+
for tol in task.tolerations:
|
|
299
|
+
tol_params = [f"{attr}='{getattr(tol, attr)}'"
|
|
300
|
+
for attr in ['key', 'operator', 'value', 'effect']
|
|
301
|
+
if hasattr(tol, attr) and getattr(tol, attr)]
|
|
302
|
+
if tol_params:
|
|
303
|
+
tolerations_items.append(f"k8s.V1Toleration({', '.join(tol_params)})")
|
|
304
|
+
|
|
305
|
+
return f"\n tolerations=[{', '.join(tolerations_items)}]," if tolerations_items else ""
|
|
306
|
+
|
|
307
|
+
def _build_node_selector_code(self, task):
|
|
308
|
+
"""Node selector 코드 생성"""
|
|
309
|
+
if hasattr(task, 'node_selector') and task.node_selector:
|
|
310
|
+
return f"\n node_selector={repr(task.node_selector)},"
|
|
311
|
+
return ""
|
|
312
|
+
|
|
313
|
+
def _build_volume_mounts_code(self, task, include_base=True):
|
|
314
|
+
"""Volume mounts 코드 생성
|
|
315
|
+
|
|
316
|
+
Task별 volume_mounts가 있으면 base_volume_mounts와 병합.
|
|
317
|
+
ConfigMap 마운트(code-volume)는 항상 포함되어야 함.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
task: TaskDecorator
|
|
321
|
+
include_base: base_volume_mounts를 포함할지 여부 (repo가 없을 때 True)
|
|
322
|
+
"""
|
|
323
|
+
volume_mounts_items = []
|
|
324
|
+
if hasattr(task, 'volume_mounts') and task.volume_mounts:
|
|
325
|
+
for vm in task.volume_mounts:
|
|
326
|
+
vm_params = []
|
|
327
|
+
if hasattr(vm, 'name') and vm.name:
|
|
328
|
+
vm_params.append(f"name='{vm.name}'")
|
|
329
|
+
if hasattr(vm, 'mount_path') and vm.mount_path:
|
|
330
|
+
vm_params.append(f"mount_path='{vm.mount_path}'")
|
|
331
|
+
if hasattr(vm, 'readOnly'):
|
|
332
|
+
vm_params.append(f"read_only={vm.readOnly}")
|
|
333
|
+
if vm_params:
|
|
334
|
+
volume_mounts_items.append(f"k8s.V1VolumeMount({', '.join(vm_params)})")
|
|
335
|
+
|
|
336
|
+
# ConfigMap 마운트와 병합 (repo가 없을 때)
|
|
337
|
+
if not self.flow.repo:
|
|
338
|
+
if volume_mounts_items:
|
|
339
|
+
return f"\n volume_mounts=base_volume_mounts + [{', '.join(volume_mounts_items)}],"
|
|
340
|
+
else:
|
|
341
|
+
return f"\n volume_mounts=base_volume_mounts,"
|
|
342
|
+
|
|
343
|
+
# repo가 있을 때는 task별 volume_mounts만 사용
|
|
344
|
+
return f"\n volume_mounts=[{', '.join(volume_mounts_items)}]," if volume_mounts_items else ""
|
|
345
|
+
|
|
346
|
+
def _build_custom_cli_args(self):
|
|
347
|
+
"""커스텀 CLI 인자를 명령줄 문자열로 변환
|
|
348
|
+
|
|
349
|
+
예: {'mode': 'prod', 'debug': True} → ' --mode prod --debug'
|
|
350
|
+
"""
|
|
351
|
+
if not self.custom_cli_args:
|
|
352
|
+
return ""
|
|
353
|
+
|
|
354
|
+
args_parts = []
|
|
355
|
+
for key, value in self.custom_cli_args.items():
|
|
356
|
+
# key를 CLI 형식으로 변환 (underscore → dash)
|
|
357
|
+
cli_key = key.replace('_', '-')
|
|
358
|
+
|
|
359
|
+
if isinstance(value, bool):
|
|
360
|
+
if value:
|
|
361
|
+
args_parts.append(f"--{cli_key}")
|
|
362
|
+
else:
|
|
363
|
+
args_parts.append(f"--{cli_key} {value}")
|
|
364
|
+
|
|
365
|
+
return ' ' + ' '.join(args_parts) if args_parts else ""
|
|
366
|
+
|
|
367
|
+
def _build_container_resources_code(self, task):
|
|
368
|
+
"""Container resources 코드 생성"""
|
|
369
|
+
resource_attrs = ['request_cpu', 'request_memory', 'limit_cpu', 'limit_memory', 'limit_gpu']
|
|
370
|
+
if not any(hasattr(task, attr) for attr in resource_attrs):
|
|
371
|
+
return ""
|
|
372
|
+
|
|
373
|
+
requests = {}
|
|
374
|
+
limits = {}
|
|
375
|
+
if hasattr(task, 'request_cpu') and task.request_cpu:
|
|
376
|
+
requests['cpu'] = task.request_cpu
|
|
377
|
+
if hasattr(task, 'request_memory') and task.request_memory:
|
|
378
|
+
requests['memory'] = task.request_memory
|
|
379
|
+
if hasattr(task, 'limit_cpu') and task.limit_cpu:
|
|
380
|
+
limits['cpu'] = task.limit_cpu
|
|
381
|
+
if hasattr(task, 'limit_memory') and task.limit_memory:
|
|
382
|
+
limits['memory'] = task.limit_memory
|
|
383
|
+
if hasattr(task, 'limit_gpu') and task.limit_gpu:
|
|
384
|
+
limits['nvidia.com/gpu'] = task.limit_gpu
|
|
385
|
+
|
|
386
|
+
resource_requirements = f"k8s.V1ResourceRequirements(requests={repr(requests)}, limits={repr(limits)})"
|
|
387
|
+
return f"\n container_resources={resource_requirements},"
|
|
388
|
+
|
|
389
|
+
def _encode_workflow(self):
|
|
390
|
+
"""전체 workflow.py 코드를 base64로 인코딩"""
|
|
391
|
+
import sys
|
|
392
|
+
workflow_file = sys.modules[self.flow.func.__module__].__file__
|
|
393
|
+
with open(workflow_file, 'r', encoding='utf-8') as f:
|
|
394
|
+
workflow_code = f.read()
|
|
395
|
+
return base64.b64encode(workflow_code.encode()).decode()
|
|
396
|
+
|
|
397
|
+
def _encode_package(self, package_path):
|
|
398
|
+
"""패키지를 tar.gz로 압축 후 base64 인코딩 (__pycache__ 등 제외)"""
|
|
399
|
+
import tarfile
|
|
400
|
+
import io
|
|
401
|
+
|
|
402
|
+
exclude_patterns = {
|
|
403
|
+
'__pycache__', '.pyc', '.pyo', '.pyd', '.so', '.git', '.gitignore',
|
|
404
|
+
'.pytest_cache', '.mypy_cache', '.coverage', '.DS_Store', '.egg-info', 'dist', 'build'
|
|
405
|
+
}
|
|
406
|
+
exclude_extensions = ('.pyc', '.pyo', '.pyd', '.so')
|
|
407
|
+
|
|
408
|
+
def should_exclude(name):
|
|
409
|
+
"""파일/디렉토리를 제외할지 판단"""
|
|
410
|
+
parts = Path(name).parts
|
|
411
|
+
return any(part in exclude_patterns or part.endswith(exclude_extensions) for part in parts)
|
|
412
|
+
|
|
413
|
+
tar_buffer = io.BytesIO()
|
|
414
|
+
with tarfile.open(fileobj=tar_buffer, mode='w:gz') as tar:
|
|
415
|
+
package_name = os.path.basename(package_path)
|
|
416
|
+
for root, dirs, files in os.walk(package_path):
|
|
417
|
+
dirs[:] = [d for d in dirs if not should_exclude(os.path.join(root, d))]
|
|
418
|
+
for file in files:
|
|
419
|
+
file_path = os.path.join(root, file)
|
|
420
|
+
if not should_exclude(file_path):
|
|
421
|
+
arcname = os.path.join(package_name, os.path.relpath(file_path, package_path))
|
|
422
|
+
tar.add(file_path, arcname=arcname)
|
|
423
|
+
|
|
424
|
+
tar_buffer.seek(0)
|
|
425
|
+
compressed_size = len(tar_buffer.getvalue())
|
|
426
|
+
encoded = base64.b64encode(tar_buffer.read()).decode()
|
|
427
|
+
print(f'📦 Package compressed: {compressed_size/1024:.2f} KB → Base64: {len(encoded)/1024:.2f} KB')
|
|
428
|
+
return encoded
|
|
429
|
+
|
|
430
|
+
def _find_last_tasks(self):
|
|
431
|
+
"""마지막 task들 (다른 task의 dependency가 아닌 task들) 찾기"""
|
|
432
|
+
# 모든 일반 task (flow.depend는 on_failure를 포함하지 않음)
|
|
433
|
+
all_tasks = {task.name for task in self.flow.depend}
|
|
434
|
+
|
|
435
|
+
# dependency로 사용되는 task들 수집
|
|
436
|
+
dependency_tasks = {
|
|
437
|
+
dep.name
|
|
438
|
+
for task in self.flow.depend
|
|
439
|
+
if task.depend
|
|
440
|
+
for dep in task.depend
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
# 마지막 task들 = 전체 - dependency로 사용되는 것들
|
|
444
|
+
return sorted(all_tasks - dependency_tasks)
|
|
445
|
+
|
|
446
|
+
def _generate_dependencies(self):
|
|
447
|
+
"""Task 의존성 생성"""
|
|
448
|
+
dependencies = []
|
|
449
|
+
for task in self.flow.depend:
|
|
450
|
+
if task.depend:
|
|
451
|
+
dep_names = [dep.name for dep in task.depend]
|
|
452
|
+
if len(dep_names) == 1:
|
|
453
|
+
dependencies.append(f"{dep_names[0]} >> {task.name}")
|
|
454
|
+
else:
|
|
455
|
+
deps_str = ', '.join(dep_names)
|
|
456
|
+
dependencies.append(f"[{deps_str}] >> {task.name}")
|
|
457
|
+
return dependencies
|
|
458
|
+
|
|
459
|
+
def _format_dict(self, d, indent=0, base_indent=0):
|
|
460
|
+
"""Dict를 Python 코드 형식으로 포맷팅"""
|
|
461
|
+
return self._format_dict_with_raw_values(d, indent, raw_keys=set(), base_indent=base_indent)
|
|
462
|
+
|
|
463
|
+
def _format_dict_with_raw_values(self, d, indent=0, raw_keys=None, base_indent=0):
|
|
464
|
+
"""Dict를 Python 코드 형식으로 포맷팅
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
d: 포맷팅할 dict
|
|
468
|
+
indent: 들여쓰기 레벨
|
|
469
|
+
raw_keys: 값을 문자열로 감싸지 않고 그대로 출력할 키들 (volumes, volume_mounts 등)
|
|
470
|
+
base_indent: 기본 들여쓰기 (DAG 정의 내부에서 사용 시)
|
|
471
|
+
"""
|
|
472
|
+
if not d:
|
|
473
|
+
return "{}"
|
|
474
|
+
|
|
475
|
+
if raw_keys is None:
|
|
476
|
+
raw_keys = {'volumes', 'volume_mounts'}
|
|
477
|
+
|
|
478
|
+
items = []
|
|
479
|
+
for key, value in d.items():
|
|
480
|
+
# raw_keys에 해당하는 키는 값을 그대로 출력 (이미 코드 문자열인 경우)
|
|
481
|
+
if key in raw_keys and isinstance(value, str):
|
|
482
|
+
items.append(f"'{key}': {value}")
|
|
483
|
+
elif isinstance(value, str):
|
|
484
|
+
items.append(f"'{key}': '{value}'")
|
|
485
|
+
elif isinstance(value, datetime):
|
|
486
|
+
items.append(f"'{key}': datetime({value.year}, {value.month}, {value.day})")
|
|
487
|
+
elif isinstance(value, timedelta):
|
|
488
|
+
items.append(f"'{key}': timedelta(minutes={int(value.total_seconds() / 60)})")
|
|
489
|
+
elif isinstance(value, bool):
|
|
490
|
+
items.append(f"'{key}': {value}")
|
|
491
|
+
elif isinstance(value, (int, float)):
|
|
492
|
+
items.append(f"'{key}': {value}")
|
|
493
|
+
elif isinstance(value, dict):
|
|
494
|
+
items.append(f"'{key}': {self._format_dict_with_raw_values(value, indent + 1, raw_keys, base_indent)}")
|
|
495
|
+
elif isinstance(value, list):
|
|
496
|
+
# volumes 리스트를 특별 처리
|
|
497
|
+
if key == 'volumes':
|
|
498
|
+
volumes_str = self._format_volumes(value)
|
|
499
|
+
items.append(f"'{key}': {volumes_str}")
|
|
500
|
+
else:
|
|
501
|
+
items.append(f"'{key}': {value}")
|
|
502
|
+
else:
|
|
503
|
+
items.append(f"'{key}': {repr(value)}")
|
|
504
|
+
|
|
505
|
+
total_indent = base_indent + indent
|
|
506
|
+
indent_str = ' ' * total_indent
|
|
507
|
+
if len(items) <= 2:
|
|
508
|
+
return '{' + ', '.join(items) + '}'
|
|
509
|
+
else:
|
|
510
|
+
return '{\n' + indent_str + ' ' + f',\n{indent_str} '.join(items) + '\n' + indent_str + '}'
|
|
511
|
+
|
|
512
|
+
def _format_volumes(self, volumes_list):
|
|
513
|
+
"""Volumes 리스트를 k8s.V1Volume 형태로 포맷팅 (한 줄)"""
|
|
514
|
+
volumes_items = []
|
|
515
|
+
for vol in volumes_list:
|
|
516
|
+
if isinstance(vol, dict) and 'name' in vol and 'persistent_volume_claim' in vol:
|
|
517
|
+
pvc_claim = f"k8s.V1PersistentVolumeClaimVolumeSource(claim_name='{vol['persistent_volume_claim']}')"
|
|
518
|
+
volumes_items.append(f"k8s.V1Volume(name='{vol['name']}', persistent_volume_claim={pvc_claim})")
|
|
519
|
+
|
|
520
|
+
if len(volumes_items) == 0:
|
|
521
|
+
return "[]"
|
|
522
|
+
elif len(volumes_items) == 1:
|
|
523
|
+
return f"[{volumes_items[0]}]"
|
|
524
|
+
else:
|
|
525
|
+
return "[\n " + ",\n ".join(volumes_items) + "\n ]"
|
|
526
|
+
|
|
527
|
+
def _format_volumes_list(self, volumes_list):
|
|
528
|
+
"""Volumes 리스트를 k8s.V1Volume 문자열 리스트로 반환 (여러 줄 포맷)"""
|
|
529
|
+
volumes_items = []
|
|
530
|
+
for vol in volumes_list:
|
|
531
|
+
if isinstance(vol, dict) and 'name' in vol and 'persistent_volume_claim' in vol:
|
|
532
|
+
volumes_items.append(f"""k8s.V1Volume(
|
|
533
|
+
name='{vol['name']}',
|
|
534
|
+
persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
|
|
535
|
+
claim_name='{vol['persistent_volume_claim']}'
|
|
536
|
+
)
|
|
537
|
+
)""")
|
|
538
|
+
return volumes_items
|
|
539
|
+
|
|
540
|
+
def _generate_configmap_yaml(self, output_dir):
|
|
541
|
+
"""ConfigMap YAML 파일 생성
|
|
542
|
+
|
|
543
|
+
workflow 코드와 추가 패키지를 ConfigMap의 binaryData로 저장.
|
|
544
|
+
Kubernetes ConfigMap의 최대 크기는 1MB이므로, 필요시 여러 ConfigMap으로 분할.
|
|
545
|
+
"""
|
|
546
|
+
import yaml
|
|
547
|
+
|
|
548
|
+
configmap_path = os.path.join(output_dir, f'{self.configmap_name}.yaml')
|
|
549
|
+
|
|
550
|
+
# workflow 코드 읽기
|
|
551
|
+
workflow_file = sys.modules[self.flow.func.__module__].__file__
|
|
552
|
+
with open(workflow_file, 'r', encoding='utf-8') as f:
|
|
553
|
+
workflow_code = f.read()
|
|
554
|
+
|
|
555
|
+
# ConfigMap 데이터 준비
|
|
556
|
+
data = {
|
|
557
|
+
self.workflow_filename: workflow_code
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
binary_data = {}
|
|
561
|
+
|
|
562
|
+
# 추가 패키지들을 tar.gz로 압축하여 binaryData에 추가
|
|
563
|
+
if self.extra_packages:
|
|
564
|
+
for pkg_path in self.extra_packages:
|
|
565
|
+
if not os.path.exists(pkg_path):
|
|
566
|
+
raise FileNotFoundError(
|
|
567
|
+
f"❌ EXTRA_PACKAGES에 지정된 패키지를 찾을 수 없습니다: {pkg_path}\n"
|
|
568
|
+
f" 현재 작업 디렉토리: {os.getcwd()}\n"
|
|
569
|
+
f" 절대 경로로 지정하거나, workflow 파일과 같은 위치에 패키지가 있는지 확인하세요."
|
|
570
|
+
)
|
|
571
|
+
pkg_name = os.path.basename(pkg_path)
|
|
572
|
+
pkg_tar_name = f'{pkg_name}.tar.gz'
|
|
573
|
+
pkg_b64 = self._encode_package(pkg_path)
|
|
574
|
+
binary_data[pkg_tar_name] = pkg_b64
|
|
575
|
+
|
|
576
|
+
# ConfigMap YAML 생성
|
|
577
|
+
configmap = {
|
|
578
|
+
'apiVersion': 'v1',
|
|
579
|
+
'kind': 'ConfigMap',
|
|
580
|
+
'metadata': {
|
|
581
|
+
'name': self.configmap_name,
|
|
582
|
+
'namespace': self.k8s_namespace,
|
|
583
|
+
'labels': {
|
|
584
|
+
'app': self.dag_id,
|
|
585
|
+
'managed-by': 'flowhub'
|
|
586
|
+
}
|
|
587
|
+
},
|
|
588
|
+
'data': data
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
if binary_data:
|
|
592
|
+
configmap['binaryData'] = binary_data
|
|
593
|
+
|
|
594
|
+
# YAML 파일 저장
|
|
595
|
+
with open(configmap_path, 'w', encoding='utf-8') as f:
|
|
596
|
+
yaml.dump(configmap, f, default_flow_style=False, allow_unicode=True)
|
|
597
|
+
|
|
598
|
+
# ConfigMap 크기 체크 (경고)
|
|
599
|
+
total_size = len(workflow_code)
|
|
600
|
+
for key, value in binary_data.items():
|
|
601
|
+
total_size += len(value)
|
|
602
|
+
|
|
603
|
+
if total_size > 1024 * 1024: # 1MB
|
|
604
|
+
print(f'⚠️ Warning: ConfigMap size ({total_size / 1024:.1f}KB) exceeds 1MB limit.')
|
|
605
|
+
print(f' Consider using git repo option or splitting into multiple ConfigMaps.')
|
|
606
|
+
|
|
607
|
+
return configmap_path
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .storage import Storage
|
|
2
|
+
from .local_storage import LocalStorage
|
|
3
|
+
from .s3_storage import S3Storage
|
|
4
|
+
|
|
5
|
+
def get_storage(env) -> Storage:
|
|
6
|
+
if 'BUCKET' in env:
|
|
7
|
+
return S3Storage(env)
|
|
8
|
+
elif 'WORKSPACE' in env:
|
|
9
|
+
return LocalStorage(env)
|
|
10
|
+
else:
|
|
11
|
+
raise Exception('BUCKET or WORKSPACE is required')
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
from .storage import Storage
|
|
5
|
+
|
|
6
|
+
class LocalStorage(Storage):
|
|
7
|
+
|
|
8
|
+
def __init__(self, env):
|
|
9
|
+
super().__init__(env)
|
|
10
|
+
self.workspace = env['WORKSPACE']
|
|
11
|
+
|
|
12
|
+
def upload(self, file_path):
|
|
13
|
+
filename = file_path.split('/')[-1]
|
|
14
|
+
key = f"{self.workspace}/{self.get_key(filename)}"
|
|
15
|
+
os.makedirs(os.path.dirname(key), exist_ok=True)
|
|
16
|
+
key = shutil.copy2(file_path, key)
|
|
17
|
+
return f's3://workspace/{key}'
|
|
18
|
+
|
|
19
|
+
def download(self, s3_url, local_path):
|
|
20
|
+
key = s3_url.replace('s3://workspace/', '')
|
|
21
|
+
filename = s3_url.split('/')[-1]
|
|
22
|
+
dest = f'{local_path}/{filename}'
|
|
23
|
+
shutil.copy2(key, dest)
|
|
24
|
+
return dest
|