codeflowhub 0.1.3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/PKG-INFO +1 -1
  2. codeflowhub-0.2.0/codeflowhub/airflow/xcom.py +93 -0
  3. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/flow.py +22 -2
  4. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/service/airflow_exporter.py +34 -4
  5. codeflowhub-0.2.0/codeflowhub/template/transcript_pkg/__init__.py +0 -0
  6. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub.egg-info/PKG-INFO +1 -1
  7. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub.egg-info/SOURCES.txt +2 -0
  8. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/setup.py +1 -1
  9. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/LICENSE +0 -0
  10. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/README.md +0 -0
  11. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/__init__.py +0 -0
  12. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/action.py +0 -0
  13. {codeflowhub-0.1.3/codeflowhub/template/analyze_speaker_pkg → codeflowhub-0.2.0/codeflowhub/airflow}/__init__.py +0 -0
  14. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/base.py +0 -0
  15. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/model.py +0 -0
  16. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/service/__init__.py +0 -0
  17. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/storage/__init__.py +0 -0
  18. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/storage/local_storage.py +0 -0
  19. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/storage/s3_storage.py +0 -0
  20. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/storage/storage.py +0 -0
  21. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/task.py +0 -0
  22. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/template/__init__.py +0 -0
  23. {codeflowhub-0.1.3/codeflowhub/template/extract_voice_pkg → codeflowhub-0.2.0/codeflowhub/template/analyze_speaker_pkg}/__init__.py +0 -0
  24. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/template/analyze_speaker_pkg/main.py +0 -0
  25. {codeflowhub-0.1.3/codeflowhub/template/read_pdf_pkg → codeflowhub-0.2.0/codeflowhub/template/extract_voice_pkg}/__init__.py +0 -0
  26. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/template/extract_voice_pkg/main.py +0 -0
  27. {codeflowhub-0.1.3/codeflowhub/template/transcript_pkg → codeflowhub-0.2.0/codeflowhub/template/read_pdf_pkg}/__init__.py +0 -0
  28. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/template/read_pdf_pkg/main.py +0 -0
  29. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub/template/transcript_pkg/main.py +0 -0
  30. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub.egg-info/dependency_links.txt +0 -0
  31. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/codeflowhub.egg-info/top_level.txt +0 -0
  32. {codeflowhub-0.1.3 → codeflowhub-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeflowhub
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: workflow development tools
5
5
  Author: creaddiscans
6
6
  Author-email: creaddiscans@gmail.com
@@ -0,0 +1,93 @@
1
+ """Airflow XCom REST API client for fetching task outputs at runtime.
2
+
3
+ Used inside KubernetesPodOperator containers to fetch upstream task xcom data
4
+ via Airflow REST API instead of inlining it in shell arguments (ARG_MAX safe).
5
+
6
+ Environment variables (injected via Airflow Connection):
7
+ - FLOWHUB_API_URL: Airflow webserver URL (e.g. http://airflow-web:8080)
8
+ - FLOWHUB_API_USER: Basic auth user
9
+ - FLOWHUB_API_PASS: Basic auth password
10
+
11
+ Usage:
12
+ python3 -m codeflowhub.airflow.xcom <dag_id> <run_id> <task_id> <output_path>
13
+ """
14
+ import urllib.request
15
+ import json
16
+ import base64
17
+ import os
18
+ import sys
19
+ from urllib.parse import quote
20
+
21
+
22
+ def fetch_xcom(dag_id: str, run_id: str, task_id: str, output_path: str):
23
+ """Fetch xcom value from Airflow REST API and save to file.
24
+
25
+ Args:
26
+ dag_id: Airflow DAG ID
27
+ run_id: DAG Run ID
28
+ task_id: upstream task ID
29
+ output_path: path to save the JSON output
30
+ """
31
+ required_vars = ['FLOWHUB_API_URL', 'FLOWHUB_API_USER', 'FLOWHUB_API_PASS']
32
+ missing = [v for v in required_vars if v not in os.environ]
33
+ if missing:
34
+ print(f"Error: missing environment variables: {', '.join(missing)}", file=sys.stderr)
35
+ print("These should be injected via Airflow Connection (FLOWHUB_API_URL, FLOWHUB_API_USER, FLOWHUB_API_PASS)", file=sys.stderr)
36
+ sys.exit(1)
37
+
38
+ auth = base64.b64encode(
39
+ (os.environ['FLOWHUB_API_USER'] + ':' + os.environ['FLOWHUB_API_PASS']).encode()
40
+ ).decode()
41
+
42
+ url = (
43
+ os.environ['FLOWHUB_API_URL'].rstrip('/')
44
+ + '/api/v1/dags/' + quote(dag_id, safe='')
45
+ + '/dagRuns/' + quote(run_id, safe='')
46
+ + '/taskInstances/' + quote(task_id, safe='')
47
+ + '/xcomEntries/return_value'
48
+ )
49
+
50
+ req = urllib.request.Request(
51
+ url,
52
+ headers={
53
+ 'Authorization': 'Basic ' + auth,
54
+ 'Accept': 'application/json',
55
+ },
56
+ )
57
+
58
+ try:
59
+ resp = urllib.request.urlopen(req)
60
+ except urllib.error.HTTPError as e:
61
+ if e.code == 401:
62
+ print(f"Error: authentication failed for {url}", file=sys.stderr)
63
+ print("Check FLOWHUB_API_USER / FLOWHUB_API_PASS in Airflow Connection", file=sys.stderr)
64
+ elif e.code == 404:
65
+ print(f"Error: xcom not found for task '{task_id}' in dag '{dag_id}' run '{run_id}'", file=sys.stderr)
66
+ print("Ensure the upstream task completed successfully and pushed xcom", file=sys.stderr)
67
+ else:
68
+ print(f"Error: HTTP {e.code} from {url}", file=sys.stderr)
69
+ sys.exit(1)
70
+ except urllib.error.URLError as e:
71
+ print(f"Error: cannot reach Airflow API at {os.environ['FLOWHUB_API_URL']}: {e.reason}", file=sys.stderr)
72
+ sys.exit(1)
73
+
74
+ body = json.loads(resp.read())
75
+ value = body['value']
76
+
77
+ # value can be a string (already JSON) or a parsed object
78
+ if isinstance(value, str):
79
+ content = value
80
+ else:
81
+ content = json.dumps(value)
82
+
83
+ os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
84
+ with open(output_path, 'w') as f:
85
+ f.write(content)
86
+
87
+
88
+ if __name__ == '__main__':
89
+ if len(sys.argv) != 5:
90
+ print(f"Usage: python3 -m codeflowhub.airflow.xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
91
+ sys.exit(1)
92
+
93
+ fetch_xcom(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
@@ -23,13 +23,14 @@ class FlowDecorator(BaseDecorator):
23
23
  service_account_name:str # Kubernetes service account name
24
24
  volumes:list # Kubernetes volumes
25
25
  airflow_sidecar_image:str # Airflow XCom sidecar 이미지
26
+ airflow_connection_id:str # Airflow Connection ID for XCom API fetch
26
27
  repo:str # Git repository URL
27
28
  path:str # Git repo 내 작업 경로
28
29
  on_failure: 'BaseDecorator' = None # 모든 task의 기본 failure handler
29
30
 
30
31
  def __init__(self, *args, namespace='default', env=None, name=None, description=None, params=None,
31
32
  tags=None, annotations=None, service_account_name=None, volumes=None,
32
- airflow_sidecar_image=None, repo=None, path=None, on_failure=None, **kwargs):
33
+ airflow_sidecar_image=None, airflow_connection_id=None, repo=None, path=None, on_failure=None, **kwargs):
33
34
  # CLI 속성 먼저 초기화 (init()에서 사용됨)
34
35
  self._cli_export = None
35
36
  self._cli_job_dir = None
@@ -47,6 +48,7 @@ class FlowDecorator(BaseDecorator):
47
48
  self.service_account_name = service_account_name
48
49
  self.volumes = volumes or []
49
50
  self.airflow_sidecar_image = airflow_sidecar_image
51
+ self.airflow_connection_id = airflow_connection_id
50
52
  self.repo = repo
51
53
  self.path = path
52
54
  self.on_failure = on_failure
@@ -509,6 +511,10 @@ class FlowDecorator(BaseDecorator):
509
511
  - task가 실행된 적이 없으면: 의존하는 task들의 output을 병합하여 반환
510
512
  """
511
513
  if not os.path.exists(BaseDecorator.run_log_file):
514
+ # run.json이 없으면 의존성 없는 첫 번째 task인지 확인 후 input.json fallback
515
+ target_task = next((task for task in self.depend if task.name == task_name), None)
516
+ if target_task and not target_task.depend:
517
+ return self._load_input_json_fallback(task_name)
512
518
  raise FileNotFoundError(f'{BaseDecorator.run_log_file} 파일이 없습니다. --input-data 인자를 제공하거나 먼저 전체 workflow를 실행하세요.')
513
519
 
514
520
  run_log = self._load_log_file()
@@ -520,7 +526,8 @@ class FlowDecorator(BaseDecorator):
520
526
  # 실행된 적이 없으면 의존 task들의 output을 병합
521
527
  target_task = next((task for task in self.depend if task.name == task_name), None)
522
528
  if not target_task or not target_task.depend:
523
- raise ValueError(f'Task "{task_name}"의 실행 로그가 없고 의존 task 없습니다. --input-data를 제공하거나 먼저 필요한 task들을 실행하세요.')
529
+ # 의존성 없는 번째 task input.json fallback
530
+ return self._load_input_json_fallback(task_name)
524
531
 
525
532
  # 의존 task들의 output 수집
526
533
  merged_input = {}
@@ -540,6 +547,19 @@ class FlowDecorator(BaseDecorator):
540
547
 
541
548
  return merged_input
542
549
 
550
+ def _load_input_json_fallback(self, task_name):
551
+ """의존성 없는 첫 번째 task일 때 input.json에서 입력 데이터 로드"""
552
+ input_json_path = os.path.join(os.path.dirname(BaseDecorator.run_log_file), 'input.json')
553
+ if os.path.exists(input_json_path):
554
+ with open(input_json_path, 'r') as f:
555
+ data = json.load(f)
556
+ print(f'📂 Loading input from {input_json_path} (no run.json)')
557
+ return data
558
+ raise FileNotFoundError(
559
+ f'Task "{task_name}"의 실행 로그가 없고 {input_json_path}도 없습니다. '
560
+ f'--input-data를 제공하거나 먼저 전체 workflow를 실행하세요.'
561
+ )
562
+
543
563
  def export_airflow(self, output_path=None, schedule_interval=None, start_date=None, default_args=None):
544
564
  """
545
565
  Airflow DAG로 export
@@ -28,6 +28,9 @@ class AirflowExporter:
28
28
  # XCom sidecar image: flow의 airflow_sidecar_image를 우선 사용, 없으면 env에서
29
29
  self.xcom_sidecar_image = flow_decorator.airflow_sidecar_image or self.env.get('XCOM_SIDECAR_IMAGE', None)
30
30
 
31
+ # Airflow Connection ID for XCom API fetch (None이면 기존 heredoc 방식)
32
+ self.airflow_connection_id = flow_decorator.airflow_connection_id
33
+
31
34
  # 추가 패키지 경로들 (env에서 설정 가능)
32
35
  self.extra_packages = self.env.get('EXTRA_PACKAGES', [])
33
36
 
@@ -128,6 +131,18 @@ from datetime import datetime, timedelta
128
131
  from airflow import DAG
129
132
  from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
130
133
  from kubernetes.client import models as k8s
134
+ '''
135
+
136
+ # Airflow Connection for XCom API fetch
137
+ if self.airflow_connection_id:
138
+ header += f'''
139
+ from airflow.hooks.base import BaseHook
140
+ _conn = BaseHook.get_connection('{self.airflow_connection_id}')
141
+ _fh_api_env = [
142
+ k8s.V1EnvVar(name='FLOWHUB_API_URL', value=f"{{_conn.schema}}://{{_conn.host}}:{{_conn.port}}"),
143
+ k8s.V1EnvVar(name='FLOWHUB_API_USER', value=_conn.login),
144
+ k8s.V1EnvVar(name='FLOWHUB_API_PASS', value=_conn.password),
145
+ ]
131
146
  '''
132
147
 
133
148
  # XCom sidecar 설정
@@ -281,10 +296,15 @@ base_volume_mounts = [
281
296
  # failure handler는 retries=0 설정
282
297
  retries_code = "\n retries=0," if trigger_rule == 'one_failed' else ""
283
298
 
299
+ # XCom API fetch를 사용하는 task에 env_vars 주입
300
+ env_vars_code = ""
301
+ if self.airflow_connection_id and not is_first and task.depend:
302
+ env_vars_code = "\n env_vars=_fh_api_env,"
303
+
284
304
  operator_code = f''' {task.name} = KubernetesPodOperator(
285
305
  **common,
286
306
  task_id='{task.name}',
287
- image='{task_image}',{pool_code}{trigger_rule_code}{retries_code}{tolerations_code}{node_selector_code}{volume_mounts_code}{container_resources_code}{sidecars_code}
307
+ image='{task_image}',{pool_code}{trigger_rule_code}{retries_code}{env_vars_code}{tolerations_code}{node_selector_code}{volume_mounts_code}{container_resources_code}{sidecars_code}
288
308
  arguments=[
289
309
  f\'\'\'{arguments}\'\'\'
290
310
  ],
@@ -299,9 +319,19 @@ base_volume_mounts = [
299
319
  if not task.depend:
300
320
  return "echo '{{}}' >> /app/input/0.json"
301
321
 
302
- commands = [
303
- '\n'.join([f"cat << 'EOF' > /app/input/{i}.json", f'{{{{{{{{ ti.xcom_pull(task_ids=\"{dep.name}\") | tojson }}}}}}}}', 'EOF'])
304
- for i, dep in enumerate(task.depend)]
322
+ if self.airflow_connection_id:
323
+ # XCom API fetch 방식 (ARG_MAX safe)
324
+ commands = [
325
+ f'python3 -m codeflowhub.airflow.xcom '
326
+ f'{{{{{{{{ dag.dag_id }}}}}}}} {{{{{{{{ dag_run.run_id }}}}}}}} '
327
+ f'{dep.name} /app/input/{i}.json'
328
+ for i, dep in enumerate(task.depend)
329
+ ]
330
+ else:
331
+ # 기존 heredoc 방식 (하위 호환)
332
+ commands = [
333
+ '\n'.join([f"cat << 'EOF' > /app/input/{i}.json", f'{{{{{{{{ ti.xcom_pull(task_ids=\"{dep.name}\") | tojson }}}}}}}}', 'EOF'])
334
+ for i, dep in enumerate(task.depend)]
305
335
  return "\n ".join(commands)
306
336
 
307
337
  def _build_tolerations_code(self, task):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeflowhub
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: workflow development tools
5
5
  Author: creaddiscans
6
6
  Author-email: creaddiscans@gmail.com
@@ -11,6 +11,8 @@ codeflowhub.egg-info/PKG-INFO
11
11
  codeflowhub.egg-info/SOURCES.txt
12
12
  codeflowhub.egg-info/dependency_links.txt
13
13
  codeflowhub.egg-info/top_level.txt
14
+ codeflowhub/airflow/__init__.py
15
+ codeflowhub/airflow/xcom.py
14
16
  codeflowhub/service/__init__.py
15
17
  codeflowhub/service/airflow_exporter.py
16
18
  codeflowhub/storage/__init__.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='codeflowhub',
5
- version='0.1.3',
5
+ version='0.2.0',
6
6
  description='workflow development tools',
7
7
  author='creaddiscans',
8
8
  author_email='creaddiscans@gmail.com',
File without changes
File without changes
File without changes