dataflow-core 2.1.6__tar.gz → 2.1.18rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataflow-core might be problematic. Click here for more details.

Files changed (82) hide show
  1. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/PKG-INFO +6 -1
  2. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowhubauthenticator.py +47 -60
  3. dataflow_core-2.1.18rc2/dataflow/dataflow.py +423 -0
  4. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/environment.py +62 -33
  5. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/__init__.py +9 -4
  6. dataflow_core-2.1.18rc2/dataflow/models/app_types.py +15 -0
  7. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/connection.py +5 -4
  8. dataflow_core-2.1.18rc2/dataflow/models/dataflow_zone.py +18 -0
  9. dataflow_core-2.1.18rc2/dataflow/models/environment.py +125 -0
  10. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/git_ssh.py +2 -1
  11. dataflow_core-2.1.18rc2/dataflow/models/org_associations.py +38 -0
  12. dataflow_core-2.1.18rc2/dataflow/models/organization.py +78 -0
  13. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/pinned_projects.py +2 -2
  14. dataflow_core-2.1.18rc2/dataflow/models/pod_activity.py +17 -0
  15. dataflow_core-2.1.18rc2/dataflow/models/pod_session_history.py +16 -0
  16. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/project_details.py +9 -6
  17. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/recent_project_studio.py +1 -1
  18. dataflow_core-2.1.18rc2/dataflow/models/role.py +35 -0
  19. dataflow_core-2.1.18rc2/dataflow/models/role_server.py +11 -0
  20. dataflow_core-2.1.18rc2/dataflow/models/role_zone.py +22 -0
  21. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/server_config.py +16 -8
  22. dataflow_core-2.1.18rc2/dataflow/models/team.py +23 -0
  23. dataflow_core-2.1.18rc2/dataflow/models/user.py +68 -0
  24. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/user_team.py +1 -4
  25. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/variables.py +13 -4
  26. dataflow_core-2.1.18rc2/dataflow/schemas/connection.py +87 -0
  27. dataflow_core-2.1.18rc2/dataflow/schemas/git_ssh.py +50 -0
  28. dataflow_core-2.1.18rc2/dataflow/schemas/secret.py +47 -0
  29. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/clone_environment.sh +2 -1
  30. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/create_environment.sh +4 -0
  31. dataflow_core-2.1.18rc2/dataflow/secrets_manager/__init__.py +13 -0
  32. dataflow_core-2.1.18rc2/dataflow/secrets_manager/factory.py +65 -0
  33. dataflow_core-2.1.18rc2/dataflow/secrets_manager/interface.py +22 -0
  34. dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/__init__.py +0 -0
  35. dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/aws_manager.py +164 -0
  36. dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/azure_manager.py +185 -0
  37. dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/gcp_manager.py +332 -0
  38. dataflow_core-2.1.18rc2/dataflow/secrets_manager/service.py +164 -0
  39. dataflow_core-2.1.18rc2/dataflow/secrets_manager/utils.py +58 -0
  40. dataflow_core-2.1.18rc2/dataflow/utils/__init__.py +0 -0
  41. dataflow_core-2.1.18rc2/dataflow/utils/exceptions.py +112 -0
  42. dataflow_core-2.1.18rc2/dataflow/utils/get_current_user.py +60 -0
  43. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/PKG-INFO +6 -1
  44. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/SOURCES.txt +25 -5
  45. dataflow_core-2.1.18rc2/dataflow_core.egg-info/requires.txt +10 -0
  46. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/top_level.txt +1 -0
  47. dataflow_core-2.1.18rc2/dfmigration/__init__.py +0 -0
  48. dataflow_core-2.1.18rc2/dfmigration/env.py +45 -0
  49. dataflow_core-2.1.18rc2/dfmigration/versions/001_initial_baseline_migration.py +20 -0
  50. dataflow_core-2.1.18rc2/dfmigration/versions/__init__.py +0 -0
  51. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/setup.py +8 -3
  52. dataflow_core-2.1.6/dataflow/dataflow.py +0 -199
  53. dataflow_core-2.1.6/dataflow/models/app_types.py +0 -10
  54. dataflow_core-2.1.6/dataflow/models/environment.py +0 -75
  55. dataflow_core-2.1.6/dataflow/models/role.py +0 -19
  56. dataflow_core-2.1.6/dataflow/models/role_server.py +0 -14
  57. dataflow_core-2.1.6/dataflow/models/runtime.py +0 -11
  58. dataflow_core-2.1.6/dataflow/models/team.py +0 -17
  59. dataflow_core-2.1.6/dataflow/models/user.py +0 -30
  60. dataflow_core-2.1.6/dataflow/models/user_environment.py +0 -16
  61. dataflow_core-2.1.6/dataflow/utils/aws_secrets_manager.py +0 -57
  62. dataflow_core-2.1.6/dataflow/utils/get_current_user.py +0 -35
  63. dataflow_core-2.1.6/dataflow/utils/json_handler.py +0 -33
  64. dataflow_core-2.1.6/dataflow_core.egg-info/requires.txt +0 -5
  65. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/README.md +0 -0
  66. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/__init__.py +0 -0
  67. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowairflowauthenticator.py +0 -0
  68. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowsupersetauthenticator.py +0 -0
  69. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/__init__.py +0 -0
  70. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/configuration.py +0 -0
  71. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/database_manager.py +0 -0
  72. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/db.py +0 -0
  73. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/blacklist_library.py +0 -0
  74. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/environment_status.py +0 -0
  75. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/recent_projects.py +0 -0
  76. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/session.py +0 -0
  77. {dataflow_core-2.1.6/dataflow/utils → dataflow_core-2.1.18rc2/dataflow/schemas}/__init__.py +0 -0
  78. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/update_environment.sh +0 -0
  79. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/utils/logger.py +0 -0
  80. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/dependency_links.txt +0 -0
  81. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/entry_points.txt +0 -0
  82. {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/setup.cfg +0 -0
@@ -1,14 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.6
3
+ Version: 2.1.18rc2
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
7
7
  Requires-Dist: sqlalchemy
8
+ Requires-Dist: alembic
8
9
  Requires-Dist: boto3
9
10
  Requires-Dist: psycopg2-binary
10
11
  Requires-Dist: pymysql
11
12
  Requires-Dist: requests
13
+ Requires-Dist: azure-identity
14
+ Requires-Dist: azure-keyvault-secrets
15
+ Requires-Dist: google-auth
16
+ Requires-Dist: google-cloud-secret-manager
12
17
  Dynamic: author
13
18
  Dynamic: requires-dist
14
19
  Dynamic: summary
@@ -1,6 +1,4 @@
1
- import os
2
- import uuid
3
- import re
1
+ import os, uuid, re, hashlib, secrets
4
2
  from datetime import datetime, timedelta
5
3
  from zoneinfo import ZoneInfo
6
4
  from traitlets import Bool, Unicode
@@ -8,7 +6,8 @@ from jupyterhub.auth import Authenticator
8
6
  from oauthenticator.google import GoogleOAuthenticator
9
7
  from oauthenticator.azuread import AzureAdOAuthenticator
10
8
  from dataflow.db import get_db
11
- from dataflow.models import user as m_user, session as m_session, role as m_role
9
+ from dataflow.models import user as m_user, session as m_session
10
+ from sqlalchemy import or_
12
11
 
13
12
  class DataflowBaseAuthenticator(Authenticator):
14
13
  enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
@@ -28,7 +27,7 @@ class DataflowBaseAuthenticator(Authenticator):
28
27
  return str(uuid.uuid4())
29
28
 
30
29
  def set_session_cookie(self, handler, session_id):
31
- expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=365)
30
+ expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=60)
32
31
  host = handler.request.host
33
32
  domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
34
33
  handler.set_cookie(
@@ -44,19 +43,12 @@ class DataflowBaseAuthenticator(Authenticator):
44
43
  self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
45
44
 
46
45
  def get_or_create_session(self, user_id):
47
- existing_session = (
48
- self.db.query(m_session.Session)
49
- .filter(m_session.Session.user_id == str(user_id))
50
- .first()
51
- )
52
- if existing_session:
53
- self.log.info(f"Reusing existing session: {existing_session.session_id}")
54
- return existing_session.session_id
55
46
  session_id = self.generate_session_id()
56
47
  while self.db.query(m_session.Session).filter(
57
48
  m_session.Session.session_id == session_id
58
49
  ).first():
59
50
  session_id = self.generate_session_id()
51
+
60
52
  db_item = m_session.Session(user_id=user_id, session_id=session_id)
61
53
  self.db.add(db_item)
62
54
  self.db.commit()
@@ -77,42 +69,28 @@ class DataflowBaseAuthenticator(Authenticator):
77
69
 
78
70
  return super().check_blocked_users(username, authenticated)
79
71
 
80
- def get_applicant_role_id(self):
81
- """Get the role ID for 'Applicant' role"""
82
- try:
83
- applicant_role = (
84
- self.db.query(m_role.Role)
85
- .filter(m_role.Role.name == "Applicant")
86
- .first()
87
- )
88
- if applicant_role:
89
- return applicant_role.id
90
- else:
91
- self.log.warning("Applicant role not found in database")
92
- return None
93
- except Exception as e:
94
- self.log.error(f"Error getting Applicant role: {str(e)}")
95
- return None
96
-
97
72
  def extract_username_from_email(self, email):
98
73
  """Extract username from email by removing domain"""
99
74
  if '@' in email:
100
75
  return email.split('@')[0]
101
76
  return email
102
77
 
78
+ def generate_secure_password(self):
79
+ """Generate a secure random password hash"""
80
+ salt = secrets.token_hex(16)
81
+ random_uuid = str(uuid.uuid4())
82
+ hash_obj = hashlib.sha256((random_uuid + salt).encode())
83
+ return hash_obj.hexdigest()
84
+
103
85
  def create_new_user(self, email, first_name=None, last_name=None):
104
86
  """Create a new user with Applicant role"""
105
87
  try:
106
- role_id = self.get_applicant_role_id()
107
- if not role_id:
108
- self.log.error("Cannot create user: Applicant role not found")
109
- return None
110
-
111
88
  username = self.extract_username_from_email(email)
112
- username = re.sub(r'[^A-Za-z0-9]', '', username)
89
+ username = re.sub(r'[^a-z0-9]', '', username.lower())
113
90
  if not username:
114
91
  self.log.error("Cannot create user: Username is empty")
115
92
  return None
93
+
116
94
  existing_user = (
117
95
  self.db.query(m_user.User)
118
96
  .filter(m_user.User.user_name == username)
@@ -122,7 +100,7 @@ class DataflowBaseAuthenticator(Authenticator):
122
100
  counter = 1
123
101
  original_username = username
124
102
  while existing_user:
125
- username = f"{original_username}_{counter}"
103
+ username = f"{original_username}{counter}"
126
104
  existing_user = (
127
105
  self.db.query(m_user.User)
128
106
  .filter(m_user.User.user_name == username)
@@ -130,14 +108,13 @@ class DataflowBaseAuthenticator(Authenticator):
130
108
  )
131
109
  counter += 1
132
110
 
111
+ secure_password = self.generate_secure_password()
133
112
  new_user = m_user.User(
134
113
  user_name=username,
135
114
  first_name=first_name or username,
136
115
  last_name=last_name or "",
137
116
  email=email,
138
- role_id=role_id,
139
- active='Y',
140
- password='user@123',
117
+ password=secure_password,
141
118
  )
142
119
 
143
120
  self.db.add(new_user)
@@ -155,22 +132,29 @@ class DataflowBaseAuthenticator(Authenticator):
155
132
  async def authenticate_dataflow(self, handler, data):
156
133
  if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
157
134
  return None
158
- username = data["username"]
135
+ user_name_or_email = data["username"]
159
136
  password = data["password"]
160
- self.log.info(f"Attempting Dataflow authentication for user: {username}")
137
+ self.log.info(f"Attempting Dataflow authentication for user: {user_name_or_email}")
161
138
  try:
162
139
  user = (
163
140
  self.db.query(m_user.User)
164
- .filter(m_user.User.user_name == username)
141
+ .filter(
142
+ or_(
143
+ m_user.User.email == user_name_or_email,
144
+ m_user.User.user_name == user_name_or_email
145
+ )
146
+ )
165
147
  .first()
166
148
  )
149
+
167
150
  if not user or user.password != password:
168
- self.log.warning(f"Dataflow authentication failed for user: {username}")
151
+ self.log.warning(f"Dataflow authentication failed for user: {user_name_or_email}")
169
152
  return None
153
+
170
154
  session_id = self.get_or_create_session(user.user_id)
171
155
  self.set_session_cookie(handler, session_id)
172
- self.log.info(f"Dataflow authentication successful for user: {username}")
173
- return {"name": username, "session_id": session_id, "auth_state": {}}
156
+ self.log.info(f"Dataflow authentication successful for user: {user.user_name}")
157
+ return {"name": user.user_name, "session_id": session_id, "auth_state": {}}
174
158
  except Exception as e:
175
159
  self.log.error(f"Dataflow authentication error: {str(e)}")
176
160
  return None
@@ -248,7 +232,11 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
248
232
  azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
249
233
  azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
250
234
  azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
251
-
235
+ dataflow_oauth_type = Unicode(
236
+ default_value="google",
237
+ config=True,
238
+ help="The OAuth provider type for DataflowHub (e.g., github, google)"
239
+ )
252
240
  def __init__(self, **kwargs):
253
241
  super().__init__(**kwargs)
254
242
  self.client_id = self.azure_client_id
@@ -270,32 +258,30 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
270
258
  if not user:
271
259
  self.log.warning("Azure AD OAuth authentication failed: No user data returned")
272
260
  return None
273
-
274
- email = user.get("email") or user.get("preferred_username")
261
+
262
+ auth_state = user.get("auth_state", {})
263
+ user_info = auth_state.get("user", {}) if auth_state else {}
264
+ email = user_info.get("upn")
275
265
  if not email:
276
- self.log.warning("Azure AD OAuth authentication failed: No email in user data")
266
+ self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
277
267
  return None
278
-
268
+
279
269
  db_user = (
280
270
  self.db.query(m_user.User)
281
271
  .filter(m_user.User.email == email)
282
272
  .first()
283
273
  )
284
-
274
+
285
275
  if not db_user:
286
276
  self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
287
- # Extract additional info from user data if available
288
- auth_state = user.get("auth_state", {})
289
- user_info = auth_state.get("user", {}) if auth_state else {}
290
277
 
291
- first_name = user_info.get("given_name") or user.get("given_name")
292
- last_name = user_info.get("family_name") or user.get("family_name")
278
+ first_name = user_info.get("name") or user.get("name")
293
279
 
294
- db_user = self.create_new_user(email, first_name, last_name)
280
+ db_user = self.create_new_user(email, first_name, last_name=None)
295
281
  if not db_user:
296
282
  self.log.error(f"Failed to create new user for email: {email}")
297
283
  return None
298
-
284
+
299
285
  username = db_user.user_name
300
286
  session_id = self.get_or_create_session(db_user.user_id)
301
287
  self.set_session_cookie(handler, session_id)
@@ -305,13 +291,14 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
305
291
  "session_id": session_id,
306
292
  "auth_state": user.get("auth_state", {})
307
293
  }
294
+
308
295
  except Exception as e:
309
296
  self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
310
297
  return None
311
298
  finally:
312
299
  self.db.close()
313
300
 
314
- auth_type = os.environ.get("DATAFLOW_AUTH_TYPE", "google")
301
+ auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
315
302
 
316
303
  if auth_type == "google":
317
304
  BaseAuthenticator = DataflowGoogleAuthenticator
@@ -0,0 +1,423 @@
1
+ import os, requests
2
+ from .database_manager import DatabaseManager
3
+ import json
4
+ import base64
5
+ from .configuration import ConfigurationManager
6
+
7
+
8
+ class Dataflow:
9
+ """
10
+ Dataflow class to interact with Dataflow services.
11
+ """
12
+
13
+ @staticmethod
14
+ def _json_parse(value):
15
+ try:
16
+ result = json.loads(value)
17
+ if isinstance(result, str):
18
+ try:
19
+ return json.loads(result)
20
+ except json.JSONDecodeError:
21
+ return result
22
+ return result
23
+ except (json.JSONDecodeError, TypeError):
24
+ return value
25
+
26
+ def _parse_response_data(self, response):
27
+ """Parse response data based on datatype field or fallback to JSON parsing."""
28
+ data = response.json()
29
+ if not isinstance(data, dict):
30
+ raise ValueError("Internal Dataflow Error!")
31
+ value = data.get('value', '')
32
+ if data.get('datatype') == 'json':
33
+ return self._json_parse(value)
34
+ else:
35
+ return value
36
+
37
+ def auth(self, session_id: str):
38
+ """
39
+ Retrieve and return user information using their session ID.
40
+
41
+ Args:
42
+ session_id (str): User's session ID from cookies
43
+
44
+ Returns:
45
+ dict: User information including username, name, email, and role
46
+ """
47
+ try:
48
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
49
+ auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
50
+ response = requests.get(
51
+ auth_api,
52
+ cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
53
+ )
54
+
55
+ if response.status_code != 200:
56
+ return response.json()
57
+
58
+ user_data = response.json()
59
+ user_dict = {
60
+ "user_name": user_data["user_name"],
61
+ "first_name": user_data["first_name"],
62
+ "last_name": user_data["last_name"] if user_data.get("last_name") else "",
63
+ "email": user_data["email"],
64
+ "role": user_data["base_role"]
65
+ }
66
+ return user_dict
67
+
68
+ except Exception as e:
69
+ return e
70
+
71
+ def variable(self, variable_name: str):
72
+ """
73
+ Retrieve a Dataflow variable.
74
+
75
+ Args:
76
+ variable_name (str): Name of the variable to retrieve
77
+
78
+ Returns:
79
+ str or None: Variable value if found, None otherwise
80
+ """
81
+ try:
82
+ host_name = os.environ.get("HOSTNAME", "")
83
+ runtime = os.environ.get("RUNTIME")
84
+ slug = os.environ.get("SLUG")
85
+ org_id = os.environ.get("ORGANIZATION")
86
+
87
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
88
+ query_params = {
89
+ "key": variable_name,
90
+ }
91
+
92
+ variable_api = None
93
+ if runtime and slug:
94
+ variable_api = dataflow_config.get_config_value("auth", "variable_ui_api")
95
+ query_params["runtime"] = runtime
96
+ query_params["slug"] = slug
97
+ query_params["org_id"] = org_id
98
+ elif host_name:
99
+ variable_api = dataflow_config.get_config_value("auth", "variable_manager_api")
100
+ else:
101
+ raise Exception("Cannot run dataflow methods here!")
102
+
103
+ if not variable_api:
104
+ print("[Dataflow.variable] Variable Unreachable")
105
+ return None
106
+
107
+ response = requests.get(variable_api, params=query_params)
108
+
109
+ if response.status_code == 404:
110
+ return None
111
+ elif response.status_code >= 500:
112
+ response.raise_for_status()
113
+ elif response.status_code >= 400:
114
+ print(f"[Dataflow.variable] Client error {response.status_code} for variable '{variable_name}'")
115
+ return None
116
+ elif response.status_code != 200:
117
+ print(f"[Dataflow.variable] Unexpected status {response.status_code} for variable '{variable_name}'")
118
+ return None
119
+
120
+ return self._parse_response_data(response)
121
+
122
+ except requests.exceptions.RequestException as e:
123
+ raise RuntimeError(f"[Dataflow.variable] Failed to fetch variable '{variable_name}'") from e
124
+
125
+ except Exception as e:
126
+ print(f"[Dataflow.variable] Exception occurred: {e}")
127
+ return None
128
+
129
+ def secret(self, secret_name: str):
130
+ """
131
+ Retrieve a Dataflow secret value.
132
+
133
+ Args:
134
+ secret_name (str): Name of the secret to retrieve
135
+
136
+ Returns:
137
+ str or None: Secret value if found, None otherwise
138
+ """
139
+ try:
140
+ host_name = os.environ.get("HOSTNAME", "")
141
+ runtime = os.environ.get("RUNTIME")
142
+ slug = os.environ.get("SLUG")
143
+ org_id = os.environ.get("ORGANIZATION")
144
+
145
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
146
+ query_params = {
147
+ "key": secret_name
148
+ }
149
+
150
+ if runtime:
151
+ secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
152
+ query_params["runtime"] = runtime
153
+ query_params["slug"] = slug
154
+ query_params["org_id"] = org_id
155
+ else:
156
+ secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
157
+ if not secret_api:
158
+ print("[Dataflow.secret] Secret API Unreachable")
159
+ return None
160
+
161
+ response = requests.get(secret_api, params=query_params)
162
+
163
+ if response.status_code == 404:
164
+ return None
165
+ elif response.status_code >= 500:
166
+ response.raise_for_status()
167
+ elif response.status_code >= 400:
168
+ print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
169
+ return None
170
+ elif response.status_code != 200:
171
+ print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
172
+ return None
173
+
174
+ return self._parse_response_data(response)
175
+
176
+ except requests.exceptions.RequestException as e:
177
+ raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
178
+ except Exception as e:
179
+ print(f"[Dataflow.secret] Exception occurred: {e}")
180
+ return None
181
+
182
+ def secret_file(self, secret_name: str):
183
+ """
184
+ Retrieve a Dataflow secret file.
185
+
186
+ Args:
187
+ secret_name (str): Name of the secret to retrieve
188
+
189
+ Returns:
190
+ str or None: Secret value if found, None otherwise
191
+ """
192
+ try:
193
+ host_name = os.environ.get("HOSTNAME", "")
194
+ runtime = os.environ.get("RUNTIME")
195
+ slug = os.environ.get("SLUG")
196
+ org_id = os.environ.get("ORGANIZATION")
197
+
198
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
199
+ query_params = {
200
+ "key": secret_name
201
+ }
202
+
203
+ if runtime:
204
+ secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
205
+ query_params["runtime"] = runtime
206
+ query_params["slug"] = slug
207
+ query_params["org_id"] = org_id
208
+ else:
209
+ secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
210
+ if not secret_api:
211
+ print("[Dataflow.secret] Secret API Unreachable")
212
+ return None
213
+
214
+ response = requests.get(secret_api, params=query_params)
215
+
216
+ if response.status_code == 404:
217
+ return None
218
+ elif response.status_code >= 500:
219
+ response.raise_for_status()
220
+ elif response.status_code >= 400:
221
+ print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
222
+ return None
223
+ elif response.status_code != 200:
224
+ print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
225
+ return None
226
+
227
+ response_data = response.json()
228
+ if response.status_code == 200 and response_data.get('filename'):
229
+ # For runtime mode, create file and return filepath
230
+ if runtime:
231
+ import tempfile
232
+ from pathlib import Path
233
+
234
+ # Create /tmp/secrets directory if it doesn't exist
235
+ secrets_dir = Path("/tmp/secrets")
236
+ secrets_dir.mkdir(parents=True, exist_ok=True)
237
+
238
+ # Get filename and content
239
+ filename = response_data.get('filename')
240
+ file_content = response_data.get('value')
241
+
242
+ if not filename or not file_content:
243
+ print(f"[Dataflow.secret] Missing filename or content for secret '{secret_name}'")
244
+ return None
245
+
246
+ file_path = os.path.join(secrets_dir, filename)
247
+
248
+ # Detect if content is Base64 encoded binary or text
249
+ try:
250
+ # Try to decode as Base64
251
+ decoded_content = base64.b64decode(file_content, validate=True)
252
+ # Check if it contains non-printable characters (likely binary)
253
+ is_binary = not all(32 <= byte <= 126 or byte in (9, 10, 13) for byte in decoded_content[:100])
254
+
255
+ if is_binary:
256
+ # Write as binary
257
+ with open(file_path, 'wb') as f:
258
+ f.write(decoded_content)
259
+ else:
260
+ # Decode and write as text
261
+ with open(file_path, 'w', encoding='utf-8') as f:
262
+ f.write(decoded_content.decode('utf-8'))
263
+ except Exception:
264
+ # Not Base64 or decode failed, treat as text
265
+ with open(file_path, 'w', encoding='utf-8') as f:
266
+ f.write(file_content)
267
+ return str(file_path)
268
+ else:
269
+ # For non-runtime mode, return the value as-is
270
+ return response_data.get('value')
271
+ else:
272
+ print(f"[Dataflow.secret] No file found for secret '{secret_name}'! If it is a non-file secret, please use the 'secret' method.")
273
+ return None
274
+
275
+ except requests.exceptions.RequestException as e:
276
+ raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
277
+ except Exception as e:
278
+ print(f"[Dataflow.secret] Exception occurred: {e}")
279
+ return None
280
+
281
+ def connection(self, conn_id: str, mode="session"):
282
+ """
283
+ Connects with a Dataflow connection.
284
+
285
+ Args:
286
+ conn_id (str): Connection identifier
287
+ mode (str): Return type - "session" (default) or "engine" or "url"
288
+
289
+ Returns:
290
+ Session or Engine: SQLAlchemy session or engine based on mode
291
+ """
292
+ try:
293
+ host_name = os.environ["HOSTNAME"]
294
+ runtime = os.environ.get("RUNTIME")
295
+ slug = os.environ.get("SLUG")
296
+ org_id = os.environ.get("ORGANIZATION")
297
+
298
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
299
+ query_params = {
300
+ "conn_id": conn_id
301
+ }
302
+
303
+ if runtime:
304
+ query_params["runtime"] = runtime
305
+ query_params["org_id"] = org_id
306
+ query_params["slug"] = slug
307
+ connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
308
+ elif host_name:
309
+ connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
310
+ else:
311
+ raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
312
+
313
+ response = requests.get(connection_api, params=query_params)
314
+
315
+ if response.status_code == 404:
316
+ raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
317
+ elif response.status_code >= 500:
318
+ response.raise_for_status()
319
+ elif response.status_code >= 400:
320
+ raise RuntimeError(f"[Dataflow.connection] Client error {response.status_code} for connection '{conn_id}'")
321
+ elif response.status_code != 200:
322
+ raise RuntimeError(f"[Dataflow.connection] Unexpected status {response.status_code} for connection '{conn_id}'")
323
+
324
+ connection_details = response.json()
325
+
326
+ if not connection_details:
327
+ raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
328
+
329
+ if mode == "dict":
330
+ return dict(connection_details)
331
+
332
+ conn_type = connection_details['conn_type'].lower()
333
+ username = connection_details['login']
334
+ password = connection_details.get('password', '')
335
+ host = connection_details['host']
336
+ port = connection_details['port']
337
+ database = connection_details.get('schemas', '')
338
+
339
+ user_info = f"{username}:{password}@" if password else f"{username}@"
340
+ db_info = f"/{database}" if database else ""
341
+
342
+ connection_string = f"{conn_type}://{user_info}{host}:{port}{db_info}"
343
+
344
+ extra = connection_details.get('extra', '')
345
+ if extra:
346
+ try:
347
+ extra_params = json.loads(extra)
348
+ if extra_params:
349
+ extra_query = "&".join(f"{key}={value}" for key, value in extra_params.items())
350
+ connection_string += f"?{extra_query}"
351
+ except json.JSONDecodeError:
352
+ # If 'extra' is not valid JSON, skip adding extra parameters
353
+ pass
354
+
355
+ if mode == "url":
356
+ return connection_string
357
+
358
+ connection_instance = DatabaseManager(connection_string)
359
+ if mode == "engine":
360
+ return connection_instance.get_engine()
361
+ elif mode == "session":
362
+ return next(connection_instance.get_session())
363
+ else:
364
+ raise ValueError(f"Unsupported mode: {mode}. Use 'session', 'engine', 'url'.")
365
+
366
+ except requests.exceptions.RequestException as e:
367
+ raise RuntimeError(f"[Dataflow.connection] Failed to fetch connection '{conn_id}'") from e
368
+
369
+ except Exception as e:
370
+ raise RuntimeError(f"[Dataflow.connection] Error connecting to '{conn_id}': {str(e)}") from e
371
+
372
+ def variable_or_secret(self, key: str):
373
+ """
374
+ Retrieve a variable or secret by key.
375
+
376
+ Args:
377
+ key (str): Key of the variable or secret
378
+
379
+ Returns:
380
+ str or None: Value if found, None otherwise
381
+ """
382
+ try:
383
+ host_name = os.environ.get("HOSTNAME", "")
384
+ runtime = os.environ.get("RUNTIME")
385
+ slug = os.environ.get("SLUG")
386
+ org_id = os.environ.get("ORGANIZATION")
387
+
388
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
389
+ query_params = {
390
+ "key": key
391
+ }
392
+
393
+ if runtime:
394
+ variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
395
+ query_params["runtime"] = runtime
396
+ query_params["slug"] = slug
397
+ query_params["org_id"] = org_id
398
+ elif host_name:
399
+ variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_manager_api")
400
+ else:
401
+ raise Exception("Cannot run dataflow methods here!")
402
+
403
+ if not variableorsecret_api:
404
+ print("[Dataflow.variable_or_secret] Variable/Secret Unreachable")
405
+ return None
406
+
407
+ response = requests.get(variableorsecret_api, params=query_params)
408
+
409
+ if response.status_code == 404:
410
+ return None
411
+ elif response.status_code >= 500:
412
+ response.raise_for_status() # Let server errors propagate
413
+ elif response.status_code >= 400:
414
+ print(f"[Dataflow.variable_or_secret] Client error {response.status_code} for key '{key}'")
415
+ return None
416
+ elif response.status_code != 200:
417
+ print(f"[Dataflow.variable_or_secret] Unexpected status {response.status_code} for key '{key}'")
418
+ return None
419
+
420
+ return self._parse_response_data(response)
421
+
422
+ except requests.exceptions.RequestException as e:
423
+ raise RuntimeError(f"[Dataflow.variable_or_secret] Failed to fetch '{key}'") from e