codeflowhub 0.1.4__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/PKG-INFO +1 -1
- codeflowhub-0.2.1/codeflowhub/airflow/xcom.py +140 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/flow.py +3 -1
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/service/airflow_exporter.py +40 -4
- codeflowhub-0.2.1/codeflowhub/template/transcript_pkg/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub.egg-info/PKG-INFO +1 -1
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub.egg-info/SOURCES.txt +2 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/setup.py +1 -1
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/LICENSE +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/README.md +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/action.py +0 -0
- {codeflowhub-0.1.4/codeflowhub/template/analyze_speaker_pkg → codeflowhub-0.2.1/codeflowhub/airflow}/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/base.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/model.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/service/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/storage/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/storage/local_storage.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/storage/s3_storage.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/storage/storage.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/task.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/template/__init__.py +0 -0
- {codeflowhub-0.1.4/codeflowhub/template/extract_voice_pkg → codeflowhub-0.2.1/codeflowhub/template/analyze_speaker_pkg}/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/template/analyze_speaker_pkg/main.py +0 -0
- {codeflowhub-0.1.4/codeflowhub/template/read_pdf_pkg → codeflowhub-0.2.1/codeflowhub/template/extract_voice_pkg}/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/template/extract_voice_pkg/main.py +0 -0
- {codeflowhub-0.1.4/codeflowhub/template/transcript_pkg → codeflowhub-0.2.1/codeflowhub/template/read_pdf_pkg}/__init__.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/template/read_pdf_pkg/main.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub/template/transcript_pkg/main.py +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub.egg-info/dependency_links.txt +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/codeflowhub.egg-info/top_level.txt +0 -0
- {codeflowhub-0.1.4 → codeflowhub-0.2.1}/setup.cfg +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Airflow REST API client for fetching task data at runtime.
|
|
2
|
+
|
|
3
|
+
Used inside KubernetesPodOperator containers to fetch xcom/params data
|
|
4
|
+
via Airflow REST API instead of inlining it in shell arguments (ARG_MAX safe).
|
|
5
|
+
|
|
6
|
+
Environment variables (injected via Airflow Connection):
|
|
7
|
+
- FLOWHUB_API_URL: Airflow webserver URL (e.g. http://airflow-web:8080)
|
|
8
|
+
- FLOWHUB_API_USER: Basic auth user
|
|
9
|
+
- FLOWHUB_API_PASS: Basic auth password
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>
|
|
13
|
+
python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>
|
|
14
|
+
"""
|
|
15
|
+
import urllib.request
|
|
16
|
+
import json
|
|
17
|
+
import base64
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
from urllib.parse import quote
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _api_request(path: str):
|
|
24
|
+
"""Send authenticated GET request to Airflow REST API.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
path: API path (e.g. /api/v1/dags/...)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
parsed JSON response body
|
|
31
|
+
"""
|
|
32
|
+
required_vars = ['FLOWHUB_API_URL', 'FLOWHUB_API_USER', 'FLOWHUB_API_PASS']
|
|
33
|
+
missing = [v for v in required_vars if v not in os.environ]
|
|
34
|
+
if missing:
|
|
35
|
+
print(f"Error: missing environment variables: {', '.join(missing)}", file=sys.stderr)
|
|
36
|
+
print("These should be injected via Airflow Connection (FLOWHUB_API_URL, FLOWHUB_API_USER, FLOWHUB_API_PASS)", file=sys.stderr)
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
|
|
39
|
+
auth = base64.b64encode(
|
|
40
|
+
(os.environ['FLOWHUB_API_USER'] + ':' + os.environ['FLOWHUB_API_PASS']).encode()
|
|
41
|
+
).decode()
|
|
42
|
+
|
|
43
|
+
url = os.environ['FLOWHUB_API_URL'].rstrip('/') + path
|
|
44
|
+
|
|
45
|
+
req = urllib.request.Request(
|
|
46
|
+
url,
|
|
47
|
+
headers={
|
|
48
|
+
'Authorization': 'Basic ' + auth,
|
|
49
|
+
'Accept': 'application/json',
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
resp = urllib.request.urlopen(req)
|
|
55
|
+
except urllib.error.HTTPError as e:
|
|
56
|
+
if e.code == 401:
|
|
57
|
+
print(f"Error: authentication failed for {url}", file=sys.stderr)
|
|
58
|
+
print("Check FLOWHUB_API_USER / FLOWHUB_API_PASS in Airflow Connection", file=sys.stderr)
|
|
59
|
+
elif e.code == 404:
|
|
60
|
+
print(f"Error: resource not found: {url}", file=sys.stderr)
|
|
61
|
+
else:
|
|
62
|
+
print(f"Error: HTTP {e.code} from {url}", file=sys.stderr)
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
except urllib.error.URLError as e:
|
|
65
|
+
print(f"Error: cannot reach Airflow API at {os.environ['FLOWHUB_API_URL']}: {e.reason}", file=sys.stderr)
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
return json.loads(resp.read())
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _write_json(content, output_path: str):
|
|
72
|
+
"""Write JSON content to file."""
|
|
73
|
+
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
|
|
74
|
+
with open(output_path, 'w') as f:
|
|
75
|
+
if isinstance(content, str):
|
|
76
|
+
f.write(content)
|
|
77
|
+
else:
|
|
78
|
+
json.dump(content, f)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def fetch_xcom(dag_id: str, run_id: str, task_id: str, output_path: str):
|
|
82
|
+
"""Fetch xcom value from Airflow REST API and save to file.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
dag_id: Airflow DAG ID
|
|
86
|
+
run_id: DAG Run ID
|
|
87
|
+
task_id: upstream task ID
|
|
88
|
+
output_path: path to save the JSON output
|
|
89
|
+
"""
|
|
90
|
+
path = (
|
|
91
|
+
'/api/v1/dags/' + quote(dag_id, safe='')
|
|
92
|
+
+ '/dagRuns/' + quote(run_id, safe='')
|
|
93
|
+
+ '/taskInstances/' + quote(task_id, safe='')
|
|
94
|
+
+ '/xcomEntries/return_value'
|
|
95
|
+
)
|
|
96
|
+
body = _api_request(path)
|
|
97
|
+
_write_json(body['value'], output_path)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def fetch_params(dag_id: str, run_id: str, output_path: str):
|
|
101
|
+
"""Fetch DAG run params (conf) from Airflow REST API and save to file.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
dag_id: Airflow DAG ID
|
|
105
|
+
run_id: DAG Run ID
|
|
106
|
+
output_path: path to save the JSON output
|
|
107
|
+
"""
|
|
108
|
+
path = (
|
|
109
|
+
'/api/v1/dags/' + quote(dag_id, safe='')
|
|
110
|
+
+ '/dagRuns/' + quote(run_id, safe='')
|
|
111
|
+
)
|
|
112
|
+
body = _api_request(path)
|
|
113
|
+
_write_json(body.get('conf', {}), output_path)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == '__main__':
|
|
117
|
+
if len(sys.argv) < 2:
|
|
118
|
+
print("Usage:", file=sys.stderr)
|
|
119
|
+
print(" python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
|
|
120
|
+
print(" python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>", file=sys.stderr)
|
|
121
|
+
sys.exit(1)
|
|
122
|
+
|
|
123
|
+
command = sys.argv[1]
|
|
124
|
+
|
|
125
|
+
if command == 'xcom':
|
|
126
|
+
if len(sys.argv) != 6:
|
|
127
|
+
print("Usage: python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
fetch_xcom(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
|
|
130
|
+
|
|
131
|
+
elif command == 'params':
|
|
132
|
+
if len(sys.argv) != 5:
|
|
133
|
+
print("Usage: python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>", file=sys.stderr)
|
|
134
|
+
sys.exit(1)
|
|
135
|
+
fetch_params(sys.argv[2], sys.argv[3], sys.argv[4])
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
print(f"Unknown command: {command}", file=sys.stderr)
|
|
139
|
+
print("Available commands: xcom, params", file=sys.stderr)
|
|
140
|
+
sys.exit(1)
|
|
@@ -23,13 +23,14 @@ class FlowDecorator(BaseDecorator):
|
|
|
23
23
|
service_account_name:str # Kubernetes service account name
|
|
24
24
|
volumes:list # Kubernetes volumes
|
|
25
25
|
airflow_sidecar_image:str # Airflow XCom sidecar 이미지
|
|
26
|
+
airflow_connection_id:str # Airflow Connection ID for XCom API fetch
|
|
26
27
|
repo:str # Git repository URL
|
|
27
28
|
path:str # Git repo 내 작업 경로
|
|
28
29
|
on_failure: 'BaseDecorator' = None # 모든 task의 기본 failure handler
|
|
29
30
|
|
|
30
31
|
def __init__(self, *args, namespace='default', env=None, name=None, description=None, params=None,
|
|
31
32
|
tags=None, annotations=None, service_account_name=None, volumes=None,
|
|
32
|
-
airflow_sidecar_image=None, repo=None, path=None, on_failure=None, **kwargs):
|
|
33
|
+
airflow_sidecar_image=None, airflow_connection_id=None, repo=None, path=None, on_failure=None, **kwargs):
|
|
33
34
|
# CLI 속성 먼저 초기화 (init()에서 사용됨)
|
|
34
35
|
self._cli_export = None
|
|
35
36
|
self._cli_job_dir = None
|
|
@@ -47,6 +48,7 @@ class FlowDecorator(BaseDecorator):
|
|
|
47
48
|
self.service_account_name = service_account_name
|
|
48
49
|
self.volumes = volumes or []
|
|
49
50
|
self.airflow_sidecar_image = airflow_sidecar_image
|
|
51
|
+
self.airflow_connection_id = airflow_connection_id
|
|
50
52
|
self.repo = repo
|
|
51
53
|
self.path = path
|
|
52
54
|
self.on_failure = on_failure
|
|
@@ -28,6 +28,9 @@ class AirflowExporter:
|
|
|
28
28
|
# XCom sidecar image: flow의 airflow_sidecar_image를 우선 사용, 없으면 env에서
|
|
29
29
|
self.xcom_sidecar_image = flow_decorator.airflow_sidecar_image or self.env.get('XCOM_SIDECAR_IMAGE', None)
|
|
30
30
|
|
|
31
|
+
# Airflow Connection ID for XCom API fetch (None이면 기존 heredoc 방식)
|
|
32
|
+
self.airflow_connection_id = flow_decorator.airflow_connection_id
|
|
33
|
+
|
|
31
34
|
# 추가 패키지 경로들 (env에서 설정 가능)
|
|
32
35
|
self.extra_packages = self.env.get('EXTRA_PACKAGES', [])
|
|
33
36
|
|
|
@@ -128,6 +131,18 @@ from datetime import datetime, timedelta
|
|
|
128
131
|
from airflow import DAG
|
|
129
132
|
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
130
133
|
from kubernetes.client import models as k8s
|
|
134
|
+
'''
|
|
135
|
+
|
|
136
|
+
# Airflow Connection for XCom API fetch
|
|
137
|
+
if self.airflow_connection_id:
|
|
138
|
+
header += f'''
|
|
139
|
+
from airflow.hooks.base import BaseHook
|
|
140
|
+
_conn = BaseHook.get_connection('{self.airflow_connection_id}')
|
|
141
|
+
_fh_api_env = [
|
|
142
|
+
k8s.V1EnvVar(name='FLOWHUB_API_URL', value=f"{{_conn.schema}}://{{_conn.host}}:{{_conn.port}}"),
|
|
143
|
+
k8s.V1EnvVar(name='FLOWHUB_API_USER', value=_conn.login),
|
|
144
|
+
k8s.V1EnvVar(name='FLOWHUB_API_PASS', value=_conn.password),
|
|
145
|
+
]
|
|
131
146
|
'''
|
|
132
147
|
|
|
133
148
|
# XCom sidecar 설정
|
|
@@ -281,10 +296,15 @@ base_volume_mounts = [
|
|
|
281
296
|
# failure handler는 retries=0 설정
|
|
282
297
|
retries_code = "\n retries=0," if trigger_rule == 'one_failed' else ""
|
|
283
298
|
|
|
299
|
+
# XCom API fetch를 사용하는 task에 env_vars 주입 (is_first: params fetch, depend: xcom fetch)
|
|
300
|
+
env_vars_code = ""
|
|
301
|
+
if self.airflow_connection_id and (is_first or task.depend):
|
|
302
|
+
env_vars_code = "\n env_vars=_fh_api_env,"
|
|
303
|
+
|
|
284
304
|
operator_code = f''' {task.name} = KubernetesPodOperator(
|
|
285
305
|
**common,
|
|
286
306
|
task_id='{task.name}',
|
|
287
|
-
image='{task_image}',{pool_code}{trigger_rule_code}{retries_code}{tolerations_code}{node_selector_code}{volume_mounts_code}{container_resources_code}{sidecars_code}
|
|
307
|
+
image='{task_image}',{pool_code}{trigger_rule_code}{retries_code}{env_vars_code}{tolerations_code}{node_selector_code}{volume_mounts_code}{container_resources_code}{sidecars_code}
|
|
288
308
|
arguments=[
|
|
289
309
|
f\'\'\'{arguments}\'\'\'
|
|
290
310
|
],
|
|
@@ -295,13 +315,29 @@ base_volume_mounts = [
|
|
|
295
315
|
def _generate_input_commands(self, task, is_first):
|
|
296
316
|
"""Input 데이터 명령어 생성"""
|
|
297
317
|
if is_first:
|
|
318
|
+
if self.airflow_connection_id:
|
|
319
|
+
return (
|
|
320
|
+
f'python3 -m codeflowhub.airflow.xcom params '
|
|
321
|
+
f'{{{{{{{{ dag.dag_id }}}}}}}} {{{{{{{{ dag_run.run_id }}}}}}}} '
|
|
322
|
+
f'/app/input/0.json'
|
|
323
|
+
)
|
|
298
324
|
return '\n'.join(["cat << 'EOF' > /app/input/0.json", '{{{{ params | tojson }}}}', 'EOF'])
|
|
299
325
|
if not task.depend:
|
|
300
326
|
return "echo '{{}}' >> /app/input/0.json"
|
|
301
327
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
328
|
+
if self.airflow_connection_id:
|
|
329
|
+
# XCom API fetch 방식 (ARG_MAX safe)
|
|
330
|
+
commands = [
|
|
331
|
+
f'python3 -m codeflowhub.airflow.xcom xcom '
|
|
332
|
+
f'{{{{{{{{ dag.dag_id }}}}}}}} {{{{{{{{ dag_run.run_id }}}}}}}} '
|
|
333
|
+
f'{dep.name} /app/input/{i}.json'
|
|
334
|
+
for i, dep in enumerate(task.depend)
|
|
335
|
+
]
|
|
336
|
+
else:
|
|
337
|
+
# 기존 heredoc 방식 (하위 호환)
|
|
338
|
+
commands = [
|
|
339
|
+
'\n'.join([f"cat << 'EOF' > /app/input/{i}.json", f'{{{{{{{{ ti.xcom_pull(task_ids=\"{dep.name}\") | tojson }}}}}}}}', 'EOF'])
|
|
340
|
+
for i, dep in enumerate(task.depend)]
|
|
305
341
|
return "\n ".join(commands)
|
|
306
342
|
|
|
307
343
|
def _build_tolerations_code(self, task):
|
|
File without changes
|
|
@@ -11,6 +11,8 @@ codeflowhub.egg-info/PKG-INFO
|
|
|
11
11
|
codeflowhub.egg-info/SOURCES.txt
|
|
12
12
|
codeflowhub.egg-info/dependency_links.txt
|
|
13
13
|
codeflowhub.egg-info/top_level.txt
|
|
14
|
+
codeflowhub/airflow/__init__.py
|
|
15
|
+
codeflowhub/airflow/xcom.py
|
|
14
16
|
codeflowhub/service/__init__.py
|
|
15
17
|
codeflowhub/service/airflow_exporter.py
|
|
16
18
|
codeflowhub/storage/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|