codeflowhub 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/PKG-INFO +1 -1
- codeflowhub-0.2.2/codeflowhub/airflow/xcom.py +140 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/flow.py +5 -1
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/service/airflow_exporter.py +9 -3
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub.egg-info/PKG-INFO +1 -1
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/setup.py +1 -1
- codeflowhub-0.2.0/codeflowhub/airflow/xcom.py +0 -93
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/LICENSE +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/README.md +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/action.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/airflow/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/base.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/model.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/service/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/storage/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/storage/local_storage.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/storage/s3_storage.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/storage/storage.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/task.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/analyze_speaker_pkg/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/analyze_speaker_pkg/main.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/extract_voice_pkg/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/extract_voice_pkg/main.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/read_pdf_pkg/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/read_pdf_pkg/main.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/transcript_pkg/__init__.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/transcript_pkg/main.py +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub.egg-info/SOURCES.txt +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub.egg-info/dependency_links.txt +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub.egg-info/top_level.txt +0 -0
- {codeflowhub-0.2.0 → codeflowhub-0.2.2}/setup.cfg +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Airflow REST API client for fetching task data at runtime.
|
|
2
|
+
|
|
3
|
+
Used inside KubernetesPodOperator containers to fetch xcom/params data
|
|
4
|
+
via Airflow REST API instead of inlining it in shell arguments (ARG_MAX safe).
|
|
5
|
+
|
|
6
|
+
Environment variables (injected via Airflow Connection):
|
|
7
|
+
- FLOWHUB_API_URL: Airflow webserver URL (e.g. http://airflow-web:8080)
|
|
8
|
+
- FLOWHUB_API_USER: Basic auth user
|
|
9
|
+
- FLOWHUB_API_PASS: Basic auth password
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>
|
|
13
|
+
python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>
|
|
14
|
+
"""
|
|
15
|
+
import urllib.request
|
|
16
|
+
import json
|
|
17
|
+
import base64
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
from urllib.parse import quote
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _api_request(path: str):
|
|
24
|
+
"""Send authenticated GET request to Airflow REST API.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
path: API path (e.g. /api/v1/dags/...)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
parsed JSON response body
|
|
31
|
+
"""
|
|
32
|
+
required_vars = ['FLOWHUB_API_URL', 'FLOWHUB_API_USER', 'FLOWHUB_API_PASS']
|
|
33
|
+
missing = [v for v in required_vars if v not in os.environ]
|
|
34
|
+
if missing:
|
|
35
|
+
print(f"Error: missing environment variables: {', '.join(missing)}", file=sys.stderr)
|
|
36
|
+
print("These should be injected via Airflow Connection (FLOWHUB_API_URL, FLOWHUB_API_USER, FLOWHUB_API_PASS)", file=sys.stderr)
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
|
|
39
|
+
auth = base64.b64encode(
|
|
40
|
+
(os.environ['FLOWHUB_API_USER'] + ':' + os.environ['FLOWHUB_API_PASS']).encode()
|
|
41
|
+
).decode()
|
|
42
|
+
|
|
43
|
+
url = os.environ['FLOWHUB_API_URL'].rstrip('/') + path
|
|
44
|
+
|
|
45
|
+
req = urllib.request.Request(
|
|
46
|
+
url,
|
|
47
|
+
headers={
|
|
48
|
+
'Authorization': 'Basic ' + auth,
|
|
49
|
+
'Accept': 'application/json',
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
resp = urllib.request.urlopen(req)
|
|
55
|
+
except urllib.error.HTTPError as e:
|
|
56
|
+
if e.code == 401:
|
|
57
|
+
print(f"Error: authentication failed for {url}", file=sys.stderr)
|
|
58
|
+
print("Check FLOWHUB_API_USER / FLOWHUB_API_PASS in Airflow Connection", file=sys.stderr)
|
|
59
|
+
elif e.code == 404:
|
|
60
|
+
print(f"Error: resource not found: {url}", file=sys.stderr)
|
|
61
|
+
else:
|
|
62
|
+
print(f"Error: HTTP {e.code} from {url}", file=sys.stderr)
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
except urllib.error.URLError as e:
|
|
65
|
+
print(f"Error: cannot reach Airflow API at {os.environ['FLOWHUB_API_URL']}: {e.reason}", file=sys.stderr)
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
return json.loads(resp.read())
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _write_json(content, output_path: str):
|
|
72
|
+
"""Write JSON content to file."""
|
|
73
|
+
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
|
|
74
|
+
with open(output_path, 'w') as f:
|
|
75
|
+
if isinstance(content, str):
|
|
76
|
+
f.write(content)
|
|
77
|
+
else:
|
|
78
|
+
json.dump(content, f)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def fetch_xcom(dag_id: str, run_id: str, task_id: str, output_path: str):
|
|
82
|
+
"""Fetch xcom value from Airflow REST API and save to file.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
dag_id: Airflow DAG ID
|
|
86
|
+
run_id: DAG Run ID
|
|
87
|
+
task_id: upstream task ID
|
|
88
|
+
output_path: path to save the JSON output
|
|
89
|
+
"""
|
|
90
|
+
path = (
|
|
91
|
+
'/api/v1/dags/' + quote(dag_id, safe='')
|
|
92
|
+
+ '/dagRuns/' + quote(run_id, safe='')
|
|
93
|
+
+ '/taskInstances/' + quote(task_id, safe='')
|
|
94
|
+
+ '/xcomEntries/return_value'
|
|
95
|
+
)
|
|
96
|
+
body = _api_request(path)
|
|
97
|
+
_write_json(body['value'], output_path)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def fetch_params(dag_id: str, run_id: str, output_path: str):
|
|
101
|
+
"""Fetch DAG run params (conf) from Airflow REST API and save to file.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
dag_id: Airflow DAG ID
|
|
105
|
+
run_id: DAG Run ID
|
|
106
|
+
output_path: path to save the JSON output
|
|
107
|
+
"""
|
|
108
|
+
path = (
|
|
109
|
+
'/api/v1/dags/' + quote(dag_id, safe='')
|
|
110
|
+
+ '/dagRuns/' + quote(run_id, safe='')
|
|
111
|
+
)
|
|
112
|
+
body = _api_request(path)
|
|
113
|
+
_write_json(body.get('conf', {}), output_path)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == '__main__':
|
|
117
|
+
if len(sys.argv) < 2:
|
|
118
|
+
print("Usage:", file=sys.stderr)
|
|
119
|
+
print(" python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
|
|
120
|
+
print(" python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>", file=sys.stderr)
|
|
121
|
+
sys.exit(1)
|
|
122
|
+
|
|
123
|
+
command = sys.argv[1]
|
|
124
|
+
|
|
125
|
+
if command == 'xcom':
|
|
126
|
+
if len(sys.argv) != 6:
|
|
127
|
+
print("Usage: python3 -m codeflowhub.airflow.xcom xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
fetch_xcom(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
|
|
130
|
+
|
|
131
|
+
elif command == 'params':
|
|
132
|
+
if len(sys.argv) != 5:
|
|
133
|
+
print("Usage: python3 -m codeflowhub.airflow.xcom params <dag_id> <run_id> <output_path>", file=sys.stderr)
|
|
134
|
+
sys.exit(1)
|
|
135
|
+
fetch_params(sys.argv[2], sys.argv[3], sys.argv[4])
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
print(f"Unknown command: {command}", file=sys.stderr)
|
|
139
|
+
print("Available commands: xcom, params", file=sys.stderr)
|
|
140
|
+
sys.exit(1)
|
|
@@ -81,7 +81,7 @@ class FlowDecorator(BaseDecorator):
|
|
|
81
81
|
def _create_parser(cls, add_help=True):
|
|
82
82
|
"""FlowhHub CLI 파서 생성"""
|
|
83
83
|
parser = argparse.ArgumentParser(description='FlowhHub Workflow', add_help=add_help)
|
|
84
|
-
parser.add_argument('--env', type=str, default=
|
|
84
|
+
parser.add_argument('--env', type=str, default=None, help='Environment to use (default: default, or --export value if exporting)')
|
|
85
85
|
parser.add_argument('--id', type=str, default=None, help='Workflow run ID (default: run)')
|
|
86
86
|
parser.add_argument('--export', type=str, default=None, help='Export to external system (airflow)')
|
|
87
87
|
parser.add_argument('--job', type=str, default=None, help='Job directory path (auto-sets --input-data and --run-log)')
|
|
@@ -186,6 +186,10 @@ class FlowDecorator(BaseDecorator):
|
|
|
186
186
|
parser = self._create_parser(add_help=False)
|
|
187
187
|
args, _ = parser.parse_known_args()
|
|
188
188
|
|
|
189
|
+
# --env 미지정 시: --export가 있으면 export 값, 없으면 'default'
|
|
190
|
+
if args.env is None:
|
|
191
|
+
args.env = args.export if args.export and args.export in self.env_config else 'default'
|
|
192
|
+
|
|
189
193
|
if args.env and args.env in self.env_config:
|
|
190
194
|
self.select_env(args.env)
|
|
191
195
|
print(f'Using environment: {args.env}')
|
|
@@ -296,9 +296,9 @@ base_volume_mounts = [
|
|
|
296
296
|
# failure handler는 retries=0 설정
|
|
297
297
|
retries_code = "\n retries=0," if trigger_rule == 'one_failed' else ""
|
|
298
298
|
|
|
299
|
-
# XCom API fetch를 사용하는 task에 env_vars 주입
|
|
299
|
+
# XCom API fetch를 사용하는 task에 env_vars 주입 (is_first: params fetch, depend: xcom fetch)
|
|
300
300
|
env_vars_code = ""
|
|
301
|
-
if self.airflow_connection_id and
|
|
301
|
+
if self.airflow_connection_id and (is_first or task.depend):
|
|
302
302
|
env_vars_code = "\n env_vars=_fh_api_env,"
|
|
303
303
|
|
|
304
304
|
operator_code = f''' {task.name} = KubernetesPodOperator(
|
|
@@ -315,6 +315,12 @@ base_volume_mounts = [
|
|
|
315
315
|
def _generate_input_commands(self, task, is_first):
|
|
316
316
|
"""Input 데이터 명령어 생성"""
|
|
317
317
|
if is_first:
|
|
318
|
+
if self.airflow_connection_id:
|
|
319
|
+
return (
|
|
320
|
+
f'python3 -m codeflowhub.airflow.xcom params '
|
|
321
|
+
f'{{{{{{{{ dag.dag_id }}}}}}}} {{{{{{{{ dag_run.run_id }}}}}}}} '
|
|
322
|
+
f'/app/input/0.json'
|
|
323
|
+
)
|
|
318
324
|
return '\n'.join(["cat << 'EOF' > /app/input/0.json", '{{{{ params | tojson }}}}', 'EOF'])
|
|
319
325
|
if not task.depend:
|
|
320
326
|
return "echo '{{}}' >> /app/input/0.json"
|
|
@@ -322,7 +328,7 @@ base_volume_mounts = [
|
|
|
322
328
|
if self.airflow_connection_id:
|
|
323
329
|
# XCom API fetch 방식 (ARG_MAX safe)
|
|
324
330
|
commands = [
|
|
325
|
-
f'python3 -m codeflowhub.airflow.xcom '
|
|
331
|
+
f'python3 -m codeflowhub.airflow.xcom xcom '
|
|
326
332
|
f'{{{{{{{{ dag.dag_id }}}}}}}} {{{{{{{{ dag_run.run_id }}}}}}}} '
|
|
327
333
|
f'{dep.name} /app/input/{i}.json'
|
|
328
334
|
for i, dep in enumerate(task.depend)
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
"""Airflow XCom REST API client for fetching task outputs at runtime.
|
|
2
|
-
|
|
3
|
-
Used inside KubernetesPodOperator containers to fetch upstream task xcom data
|
|
4
|
-
via Airflow REST API instead of inlining it in shell arguments (ARG_MAX safe).
|
|
5
|
-
|
|
6
|
-
Environment variables (injected via Airflow Connection):
|
|
7
|
-
- FLOWHUB_API_URL: Airflow webserver URL (e.g. http://airflow-web:8080)
|
|
8
|
-
- FLOWHUB_API_USER: Basic auth user
|
|
9
|
-
- FLOWHUB_API_PASS: Basic auth password
|
|
10
|
-
|
|
11
|
-
Usage:
|
|
12
|
-
python3 -m codeflowhub.airflow.xcom <dag_id> <run_id> <task_id> <output_path>
|
|
13
|
-
"""
|
|
14
|
-
import urllib.request
|
|
15
|
-
import json
|
|
16
|
-
import base64
|
|
17
|
-
import os
|
|
18
|
-
import sys
|
|
19
|
-
from urllib.parse import quote
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def fetch_xcom(dag_id: str, run_id: str, task_id: str, output_path: str):
|
|
23
|
-
"""Fetch xcom value from Airflow REST API and save to file.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
dag_id: Airflow DAG ID
|
|
27
|
-
run_id: DAG Run ID
|
|
28
|
-
task_id: upstream task ID
|
|
29
|
-
output_path: path to save the JSON output
|
|
30
|
-
"""
|
|
31
|
-
required_vars = ['FLOWHUB_API_URL', 'FLOWHUB_API_USER', 'FLOWHUB_API_PASS']
|
|
32
|
-
missing = [v for v in required_vars if v not in os.environ]
|
|
33
|
-
if missing:
|
|
34
|
-
print(f"Error: missing environment variables: {', '.join(missing)}", file=sys.stderr)
|
|
35
|
-
print("These should be injected via Airflow Connection (FLOWHUB_API_URL, FLOWHUB_API_USER, FLOWHUB_API_PASS)", file=sys.stderr)
|
|
36
|
-
sys.exit(1)
|
|
37
|
-
|
|
38
|
-
auth = base64.b64encode(
|
|
39
|
-
(os.environ['FLOWHUB_API_USER'] + ':' + os.environ['FLOWHUB_API_PASS']).encode()
|
|
40
|
-
).decode()
|
|
41
|
-
|
|
42
|
-
url = (
|
|
43
|
-
os.environ['FLOWHUB_API_URL'].rstrip('/')
|
|
44
|
-
+ '/api/v1/dags/' + quote(dag_id, safe='')
|
|
45
|
-
+ '/dagRuns/' + quote(run_id, safe='')
|
|
46
|
-
+ '/taskInstances/' + quote(task_id, safe='')
|
|
47
|
-
+ '/xcomEntries/return_value'
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
req = urllib.request.Request(
|
|
51
|
-
url,
|
|
52
|
-
headers={
|
|
53
|
-
'Authorization': 'Basic ' + auth,
|
|
54
|
-
'Accept': 'application/json',
|
|
55
|
-
},
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
try:
|
|
59
|
-
resp = urllib.request.urlopen(req)
|
|
60
|
-
except urllib.error.HTTPError as e:
|
|
61
|
-
if e.code == 401:
|
|
62
|
-
print(f"Error: authentication failed for {url}", file=sys.stderr)
|
|
63
|
-
print("Check FLOWHUB_API_USER / FLOWHUB_API_PASS in Airflow Connection", file=sys.stderr)
|
|
64
|
-
elif e.code == 404:
|
|
65
|
-
print(f"Error: xcom not found for task '{task_id}' in dag '{dag_id}' run '{run_id}'", file=sys.stderr)
|
|
66
|
-
print("Ensure the upstream task completed successfully and pushed xcom", file=sys.stderr)
|
|
67
|
-
else:
|
|
68
|
-
print(f"Error: HTTP {e.code} from {url}", file=sys.stderr)
|
|
69
|
-
sys.exit(1)
|
|
70
|
-
except urllib.error.URLError as e:
|
|
71
|
-
print(f"Error: cannot reach Airflow API at {os.environ['FLOWHUB_API_URL']}: {e.reason}", file=sys.stderr)
|
|
72
|
-
sys.exit(1)
|
|
73
|
-
|
|
74
|
-
body = json.loads(resp.read())
|
|
75
|
-
value = body['value']
|
|
76
|
-
|
|
77
|
-
# value can be a string (already JSON) or a parsed object
|
|
78
|
-
if isinstance(value, str):
|
|
79
|
-
content = value
|
|
80
|
-
else:
|
|
81
|
-
content = json.dumps(value)
|
|
82
|
-
|
|
83
|
-
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
|
|
84
|
-
with open(output_path, 'w') as f:
|
|
85
|
-
f.write(content)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if __name__ == '__main__':
|
|
89
|
-
if len(sys.argv) != 5:
|
|
90
|
-
print(f"Usage: python3 -m codeflowhub.airflow.xcom <dag_id> <run_id> <task_id> <output_path>", file=sys.stderr)
|
|
91
|
-
sys.exit(1)
|
|
92
|
-
|
|
93
|
-
fetch_xcom(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{codeflowhub-0.2.0 → codeflowhub-0.2.2}/codeflowhub/template/analyze_speaker_pkg/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|