dataflow-core 2.1.15rc1__tar.gz → 2.1.15rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataflow-core might be problematic. Click here for more details.

Files changed (76) hide show
  1. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/PKG-INFO +2 -1
  2. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/authenticator/dataflowhubauthenticator.py +19 -41
  3. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/dataflow.py +45 -69
  4. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/environment.py +20 -11
  5. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/__init__.py +5 -3
  6. dataflow_core-2.1.15rc3/dataflow/models/app_types.py +15 -0
  7. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/connection.py +5 -4
  8. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/dataflow_zone.py +2 -3
  9. dataflow_core-2.1.15rc3/dataflow/models/environment.py +125 -0
  10. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/git_ssh.py +2 -1
  11. dataflow_core-2.1.15rc3/dataflow/models/org_associations.py +38 -0
  12. dataflow_core-2.1.15rc3/dataflow/models/organization.py +78 -0
  13. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/pinned_projects.py +2 -2
  14. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/project_details.py +9 -6
  15. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/recent_project_studio.py +1 -1
  16. dataflow_core-2.1.15rc3/dataflow/models/role.py +35 -0
  17. dataflow_core-2.1.15rc3/dataflow/models/role_server.py +11 -0
  18. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/role_zone.py +8 -3
  19. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/server_config.py +9 -5
  20. dataflow_core-2.1.15rc3/dataflow/models/team.py +23 -0
  21. dataflow_core-2.1.15rc3/dataflow/models/user.py +68 -0
  22. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/user_team.py +1 -4
  23. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/variables.py +6 -4
  24. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/service.py +11 -9
  25. dataflow_core-2.1.15rc3/dataflow/utils/get_current_user.py +60 -0
  26. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/PKG-INFO +2 -1
  27. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/SOURCES.txt +7 -2
  28. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/requires.txt +1 -0
  29. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/top_level.txt +1 -0
  30. dataflow_core-2.1.15rc3/dfmigration/__init__.py +0 -0
  31. dataflow_core-2.1.15rc3/dfmigration/env.py +45 -0
  32. dataflow_core-2.1.15rc3/dfmigration/versions/001_initial_baseline_migration.py +20 -0
  33. dataflow_core-2.1.15rc3/dfmigration/versions/__init__.py +0 -0
  34. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/setup.py +3 -2
  35. dataflow_core-2.1.15rc1/dataflow/models/app_types.py +0 -10
  36. dataflow_core-2.1.15rc1/dataflow/models/environment.py +0 -82
  37. dataflow_core-2.1.15rc1/dataflow/models/role.py +0 -29
  38. dataflow_core-2.1.15rc1/dataflow/models/role_server.py +0 -14
  39. dataflow_core-2.1.15rc1/dataflow/models/team.py +0 -17
  40. dataflow_core-2.1.15rc1/dataflow/models/user.py +0 -31
  41. dataflow_core-2.1.15rc1/dataflow/models/user_environment.py +0 -16
  42. dataflow_core-2.1.15rc1/dataflow/utils/get_current_user.py +0 -37
  43. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/README.md +0 -0
  44. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/authenticator/__init__.py +0 -0
  45. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/authenticator/dataflowairflowauthenticator.py +0 -0
  46. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/authenticator/dataflowsupersetauthenticator.py +0 -0
  47. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/__init__.py +0 -0
  48. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/configuration.py +0 -0
  49. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/database_manager.py +0 -0
  50. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/db.py +0 -0
  51. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/blacklist_library.py +0 -0
  52. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/environment_status.py +0 -0
  53. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/pod_activity.py +0 -0
  54. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/pod_session_history.py +0 -0
  55. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/recent_projects.py +0 -0
  56. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/models/session.py +0 -0
  57. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/schemas/__init__.py +0 -0
  58. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/schemas/connection.py +0 -0
  59. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/schemas/git_ssh.py +0 -0
  60. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/schemas/secret.py +0 -0
  61. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/scripts/clone_environment.sh +0 -0
  62. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/scripts/create_environment.sh +0 -0
  63. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/scripts/update_environment.sh +0 -0
  64. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/__init__.py +0 -0
  65. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/factory.py +0 -0
  66. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/interface.py +0 -0
  67. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/providers/__init__.py +0 -0
  68. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/providers/aws_manager.py +0 -0
  69. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/providers/azure_manager.py +0 -0
  70. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/secrets_manager/providers/gcp_manager.py +0 -0
  71. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/utils/__init__.py +0 -0
  72. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/utils/exceptions.py +0 -0
  73. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow/utils/logger.py +0 -0
  74. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/dependency_links.txt +0 -0
  75. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/dataflow_core.egg-info/entry_points.txt +0 -0
  76. {dataflow_core-2.1.15rc1 → dataflow_core-2.1.15rc3}/setup.cfg +0 -0
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.15rc1
3
+ Version: 2.1.15rc3
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
7
7
  Requires-Dist: sqlalchemy
8
+ Requires-Dist: alembic
8
9
  Requires-Dist: boto3
9
10
  Requires-Dist: psycopg2-binary
10
11
  Requires-Dist: pymysql
@@ -8,7 +8,8 @@ from jupyterhub.auth import Authenticator
8
8
  from oauthenticator.google import GoogleOAuthenticator
9
9
  from oauthenticator.azuread import AzureAdOAuthenticator
10
10
  from dataflow.db import get_db
11
- from dataflow.models import user as m_user, session as m_session, role as m_role
11
+ from dataflow.models import user as m_user, session as m_session
12
+ from sqlalchemy import or_
12
13
 
13
14
  class DataflowBaseAuthenticator(Authenticator):
14
15
  enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
@@ -28,7 +29,7 @@ class DataflowBaseAuthenticator(Authenticator):
28
29
  return str(uuid.uuid4())
29
30
 
30
31
  def set_session_cookie(self, handler, session_id):
31
- expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=365)
32
+ expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=60)
32
33
  host = handler.request.host
33
34
  domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
34
35
  handler.set_cookie(
@@ -44,19 +45,12 @@ class DataflowBaseAuthenticator(Authenticator):
44
45
  self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
45
46
 
46
47
  def get_or_create_session(self, user_id):
47
- existing_session = (
48
- self.db.query(m_session.Session)
49
- .filter(m_session.Session.user_id == str(user_id))
50
- .first()
51
- )
52
- if existing_session:
53
- self.log.info(f"Reusing existing session: {existing_session.session_id}")
54
- return existing_session.session_id
55
48
  session_id = self.generate_session_id()
56
49
  while self.db.query(m_session.Session).filter(
57
50
  m_session.Session.session_id == session_id
58
51
  ).first():
59
52
  session_id = self.generate_session_id()
53
+
60
54
  db_item = m_session.Session(user_id=user_id, session_id=session_id)
61
55
  self.db.add(db_item)
62
56
  self.db.commit()
@@ -77,23 +71,6 @@ class DataflowBaseAuthenticator(Authenticator):
77
71
 
78
72
  return super().check_blocked_users(username, authenticated)
79
73
 
80
- def get_applicant_role_id(self):
81
- """Get the role ID for 'Applicant' role"""
82
- try:
83
- applicant_role = (
84
- self.db.query(m_role.Role)
85
- .filter(m_role.Role.name == "Applicant")
86
- .first()
87
- )
88
- if applicant_role:
89
- return applicant_role.id
90
- else:
91
- self.log.warning("Applicant role not found in database")
92
- return None
93
- except Exception as e:
94
- self.log.error(f"Error getting Applicant role: {str(e)}")
95
- return None
96
-
97
74
  def extract_username_from_email(self, email):
98
75
  """Extract username from email by removing domain"""
99
76
  if '@' in email:
@@ -103,16 +80,12 @@ class DataflowBaseAuthenticator(Authenticator):
103
80
  def create_new_user(self, email, first_name=None, last_name=None):
104
81
  """Create a new user with Applicant role"""
105
82
  try:
106
- role_id = self.get_applicant_role_id()
107
- if not role_id:
108
- self.log.error("Cannot create user: Applicant role not found")
109
- return None
110
-
111
83
  username = self.extract_username_from_email(email)
112
84
  username = re.sub(r'[^a-z0-9]', '', username.lower())
113
85
  if not username:
114
86
  self.log.error("Cannot create user: Username is empty")
115
87
  return None
88
+
116
89
  existing_user = (
117
90
  self.db.query(m_user.User)
118
91
  .filter(m_user.User.user_name == username)
@@ -122,7 +95,7 @@ class DataflowBaseAuthenticator(Authenticator):
122
95
  counter = 1
123
96
  original_username = username
124
97
  while existing_user:
125
- username = f"{original_username}_{counter}"
98
+ username = f"{original_username}{counter}"
126
99
  existing_user = (
127
100
  self.db.query(m_user.User)
128
101
  .filter(m_user.User.user_name == username)
@@ -135,8 +108,6 @@ class DataflowBaseAuthenticator(Authenticator):
135
108
  first_name=first_name or username,
136
109
  last_name=last_name or "",
137
110
  email=email,
138
- role_id=role_id,
139
- password='user@123',
140
111
  )
141
112
 
142
113
  self.db.add(new_user)
@@ -154,22 +125,29 @@ class DataflowBaseAuthenticator(Authenticator):
154
125
  async def authenticate_dataflow(self, handler, data):
155
126
  if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
156
127
  return None
157
- username = data["username"]
128
+ user_name_or_email = data["username"]
158
129
  password = data["password"]
159
- self.log.info(f"Attempting Dataflow authentication for user: {username}")
130
+ self.log.info(f"Attempting Dataflow authentication for user: {user_name_or_email}")
160
131
  try:
161
132
  user = (
162
133
  self.db.query(m_user.User)
163
- .filter(m_user.User.user_name == username)
134
+ .filter(
135
+ or_(
136
+ m_user.User.email == user_name_or_email,
137
+ m_user.User.user_name == user_name_or_email
138
+ )
139
+ )
164
140
  .first()
165
141
  )
142
+
166
143
  if not user or user.password != password:
167
- self.log.warning(f"Dataflow authentication failed for user: {username}")
144
+ self.log.warning(f"Dataflow authentication failed for user: {user_name_or_email}")
168
145
  return None
146
+
169
147
  session_id = self.get_or_create_session(user.user_id)
170
148
  self.set_session_cookie(handler, session_id)
171
- self.log.info(f"Dataflow authentication successful for user: {username}")
172
- return {"name": username, "session_id": session_id, "auth_state": {}}
149
+ self.log.info(f"Dataflow authentication successful for user: {user.user_name}")
150
+ return {"name": user.user_name, "session_id": session_id, "auth_state": {}}
173
151
  except Exception as e:
174
152
  self.log.error(f"Dataflow authentication error: {str(e)}")
175
153
  return None
@@ -25,16 +25,13 @@ class Dataflow:
25
25
  def _parse_response_data(self, response):
26
26
  """Parse response data based on datatype field or fallback to JSON parsing."""
27
27
  data = response.json()
28
+ if not isinstance(data, dict):
29
+ raise ValueError("Internal Dataflow Error!")
28
30
  value = data.get('value', '')
29
- if isinstance(data, dict) and 'datatype' in data:
30
- value = data.get('value', '')
31
- datatype = data.get('datatype')
32
- if datatype == 'json':
33
- return self._json_parse(value)
34
- else:
35
- return value
36
- else:
31
+ if data.get('datatype') == 'json':
37
32
  return self._json_parse(value)
33
+ else:
34
+ return value
38
35
 
39
36
  def auth(self, session_id: str):
40
37
  """
@@ -84,12 +81,19 @@ class Dataflow:
84
81
  host_name = os.environ.get("HOSTNAME", "")
85
82
  runtime = os.environ.get("RUNTIME")
86
83
  slug = os.environ.get("SLUG")
84
+ org_id = os.environ.get("ORGANIZATION")
87
85
 
88
86
  dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
87
+ query_params = {
88
+ "key": variable_name,
89
+ }
89
90
 
90
91
  variable_api = None
91
92
  if runtime and slug:
92
93
  variable_api = dataflow_config.get_config_value("auth", "variable_ui_api")
94
+ query_params["runtime"] = runtime
95
+ query_params["slug"] = slug
96
+ query_params["org_id"] = org_id
93
97
  elif host_name:
94
98
  variable_api = dataflow_config.get_config_value("auth", "variable_manager_api")
95
99
  else:
@@ -98,36 +102,13 @@ class Dataflow:
98
102
  if not variable_api:
99
103
  print("[Dataflow.variable] Variable Unreachable")
100
104
  return None
101
-
102
- if runtime:
103
- query_params = {
104
- "key": variable_name,
105
- "runtime": runtime,
106
- "slug": slug
107
- }
108
- response = requests.get(variable_api, params=query_params)
109
- if response.status_code == 200:
110
- response_text = response.text.strip().strip('"')
111
- return response_text
112
-
113
- query_params["slug"] = "global"
114
- response = requests.get(variable_api, params=query_params)
115
- if response.status_code == 200:
116
- response_text = response.text.strip().strip('"')
117
- return response_text
118
- else:
119
- return None
120
-
121
- query_params = {
122
- "key": variable_name,
123
- }
105
+
124
106
  response = requests.get(variable_api, params=query_params)
125
107
 
126
- # Handle different HTTP status codes gracefully
127
108
  if response.status_code == 404:
128
- return None # Variable not found
109
+ return None
129
110
  elif response.status_code >= 500:
130
- response.raise_for_status() # Let server errors propagate
111
+ response.raise_for_status()
131
112
  elif response.status_code >= 400:
132
113
  print(f"[Dataflow.variable] Client error {response.status_code} for variable '{variable_name}'")
133
114
  return None
@@ -158,32 +139,30 @@ class Dataflow:
158
139
  host_name = os.environ.get("HOSTNAME", "")
159
140
  runtime = os.environ.get("RUNTIME")
160
141
  slug = os.environ.get("SLUG")
142
+ org_id = os.environ.get("ORGANIZATION")
161
143
 
162
144
  dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
163
- if runtime:
164
- secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
165
- else:
166
- secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
167
- if not secret_api:
168
- print("[Dataflow.secret] Secret API Unreachable")
169
- return None
170
-
171
145
  query_params = {
172
146
  "key": secret_name
173
147
  }
174
148
 
175
149
  if runtime:
150
+ secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
176
151
  query_params["runtime"] = runtime
177
- if slug:
178
152
  query_params["slug"] = slug
153
+ query_params["org_id"] = org_id
154
+ else:
155
+ secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
156
+ if not secret_api:
157
+ print("[Dataflow.secret] Secret API Unreachable")
158
+ return None
179
159
 
180
160
  response = requests.get(secret_api, params=query_params)
181
-
182
- # Handle different HTTP status codes gracefully
161
+
183
162
  if response.status_code == 404:
184
- return None # Secret not found
163
+ return None
185
164
  elif response.status_code >= 500:
186
- response.raise_for_status() # Let server errors propagate
165
+ response.raise_for_status()
187
166
  elif response.status_code >= 400:
188
167
  print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
189
168
  return None
@@ -214,31 +193,29 @@ class Dataflow:
214
193
  host_name = os.environ["HOSTNAME"]
215
194
  runtime = os.environ.get("RUNTIME")
216
195
  slug = os.environ.get("SLUG")
217
-
196
+ org_id = os.environ.get("ORGANIZATION")
197
+
218
198
  dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
219
- if runtime:
220
- connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
221
- elif host_name:
222
- connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
223
- else:
224
- raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
225
-
226
199
  query_params = {
227
200
  "conn_id": conn_id
228
201
  }
229
202
 
230
203
  if runtime:
231
204
  query_params["runtime"] = runtime
232
- if slug:
205
+ query_params["org_id"] = org_id
233
206
  query_params["slug"] = slug
207
+ connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
208
+ elif host_name:
209
+ connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
210
+ else:
211
+ raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
234
212
 
235
213
  response = requests.get(connection_api, params=query_params)
236
214
 
237
- # Handle different HTTP status codes gracefully
238
215
  if response.status_code == 404:
239
216
  raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
240
217
  elif response.status_code >= 500:
241
- response.raise_for_status() # Let server errors propagate
218
+ response.raise_for_status()
242
219
  elif response.status_code >= 400:
243
220
  raise RuntimeError(f"[Dataflow.connection] Client error {response.status_code} for connection '{conn_id}'")
244
221
  elif response.status_code != 200:
@@ -306,20 +283,20 @@ class Dataflow:
306
283
  host_name = os.environ.get("HOSTNAME", "")
307
284
  runtime = os.environ.get("RUNTIME")
308
285
  slug = os.environ.get("SLUG")
286
+ org_id = os.environ.get("ORGANIZATION")
309
287
 
310
288
  dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
311
- if runtime and slug:
312
- variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
313
- query_params = {
314
- "key": key,
315
- "runtime": runtime,
316
- "slug": slug
289
+ query_params = {
290
+ "key": key
317
291
  }
292
+
293
+ if runtime:
294
+ variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
295
+ query_params["runtime"] = runtime
296
+ query_params["slug"] = slug
297
+ query_params["org_id"] = org_id
318
298
  elif host_name:
319
299
  variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_manager_api")
320
- query_params = {
321
- "key": key
322
- }
323
300
  else:
324
301
  raise Exception("Cannot run dataflow methods here!")
325
302
 
@@ -329,9 +306,8 @@ class Dataflow:
329
306
 
330
307
  response = requests.get(variableorsecret_api, params=query_params)
331
308
 
332
- # Handle different HTTP status codes gracefully
333
309
  if response.status_code == 404:
334
- return None # Variable/secret not found
310
+ return None
335
311
  elif response.status_code >= 500:
336
312
  response.raise_for_status() # Let server errors propagate
337
313
  elif response.status_code >= 400:
@@ -6,12 +6,14 @@ from .configuration import ConfigurationManager
6
6
  from .utils.logger import CustomLogger
7
7
 
8
8
  class EnvironmentManager:
9
- def __init__(self):
9
+ def __init__(self, org_id: int = None):
10
10
  """Initialize the EnvironmentManager"""
11
11
  self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
12
- self.env_base_path = self.config.get_config_value('paths', 'env_path')
13
- self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
14
- self.env_version_path = self.config.get_config_value('paths', 'env_versions_path')
12
+ self.org_id = org_id
13
+ self.env_sub_path = f"{self.org_id}" if self.org_id is not None else "dataflow"
14
+ self.env_base_path = os.path.join(self.config.get_config_value('paths', 'env_path'), self.env_sub_path, 'python_envs')
15
+ self.env_logs_path = os.path.join(self.config.get_config_value('paths', 'env_logs_path'), self.env_sub_path, 'logs')
16
+ self.env_version_path = os.path.join(self.config.get_config_value('paths', 'env_versions_path'), self.env_sub_path, 'versions')
15
17
  self.local_env_logs_path = self.config.get_config_value('paths', 'local_env_logs_path')
16
18
  os.makedirs(self.env_version_path, exist_ok=True)
17
19
  self.logger = CustomLogger().get_logger(__name__)
@@ -334,10 +336,9 @@ class EnvironmentManager:
334
336
  """
335
337
  versioned_name = f"{env_name}_v{env_version}"
336
338
  log_file_name = f"envlog_{versioned_name}.log"
337
- log_file_dir = self.config.get_config_value('paths', 'env_logs_path')
338
- os.makedirs(log_file_dir, exist_ok=True)
339
- log_file_location = os.path.join(log_file_dir, log_file_name)
340
-
339
+ os.makedirs(self.env_logs_path, exist_ok=True)
340
+ log_file_location = os.path.join(self.env_logs_path, log_file_name)
341
+
341
342
  # Clear log file if it exists
342
343
  if os.path.exists(log_file_location):
343
344
  open(log_file_location, "w").close()
@@ -394,8 +395,14 @@ class EnvironmentManager:
394
395
  Returns:
395
396
  JobLogs: The created or updated job entry
396
397
  """
397
- job = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
398
-
398
+ job = (
399
+ db.query(JobLogs)
400
+ .filter(
401
+ JobLogs.log_file_name == log_file_name,
402
+ JobLogs.org_id == self.org_id if self.org_id is not None else JobLogs.org_id.is_(None)
403
+ )
404
+ .first()
405
+ )
399
406
  if job:
400
407
  if job.status == "success":
401
408
  self.logger.error(f"Job with log_file_name '{log_file_name}' already completed successfully.")
@@ -409,6 +416,7 @@ class EnvironmentManager:
409
416
  log_file_name=log_file_name,
410
417
  log_file_location=log_file_location,
411
418
  created_by=user_name,
419
+ org_id=self.org_id if self.org_id else None,
412
420
  status="in_progress"
413
421
  )
414
422
  db.add(job)
@@ -464,7 +472,8 @@ class EnvironmentManager:
464
472
  env_status = "Draft" if status == "success" else "Failed"
465
473
 
466
474
  db.query(Environment).filter(
467
- Environment.short_name == env_short_name
475
+ Environment.short_name == env_short_name,
476
+ Environment.org_id == self.org_id if self.org_id is not None else Environment.org_id.is_(None)
468
477
  ).update({"version": version, "pip_libraries": pip_libraries, "conda_libraries": conda_libraries, "status": env_status})
469
478
  db.commit()
470
479
 
@@ -1,9 +1,9 @@
1
1
  # init for loading models in the application
2
2
 
3
3
  from .role import Role
4
- from .user import User
4
+ from .user import User, UserOnboarding, OnboardingStatus
5
5
  from .team import Team
6
- from .environment import (Environment, LocalEnvironment, ArchivedEnvironment, JobLogs)
6
+ from .environment import (Environment, LocalEnvironment, ArchivedEnvironment, JobLogs, PipSource)
7
7
  from .project_details import ProjectDetails
8
8
  from .recent_projects import RecentProjects
9
9
  from .pinned_projects import PinnedProject
@@ -22,4 +22,6 @@ from .recent_project_studio import RecentProjectStudio
22
22
  from .connection import Connection
23
23
  from .git_ssh import GitSSH
24
24
  from .pod_activity import PodActivity
25
- from .pod_session_history import PodSessionHistory
25
+ from .pod_session_history import PodSessionHistory
26
+ from .organization import Organization, OrganizationOnboarding
27
+ from .org_associations import OrganizationServer, OrganizationUser, OrganizationAppType
@@ -0,0 +1,15 @@
1
+ from sqlalchemy import Column, Integer, String, Boolean
2
+ from sqlalchemy.orm import relationship
3
+ from dataflow.db import Base
4
+
5
+ class AppType(Base):
6
+ __tablename__ = "APP_TYPE"
7
+
8
+ id = Column(Integer, primary_key=True, autoincrement=True, unique=True)
9
+ name = Column(String, unique=True, nullable=False)
10
+ display_name = Column(String, nullable=False)
11
+ code_based = Column(Boolean, nullable=False)
12
+ studio = Column(Boolean, nullable=False, default=False, server_default='false')
13
+ runtime = Column(Boolean, nullable=False, default=False, server_default='false')
14
+
15
+ organizations = relationship("Organization", secondary="ORGANIZATION_APP_TYPE", back_populates="apps")
@@ -1,4 +1,4 @@
1
- from sqlalchemy import Column, String, Integer, Boolean, DateTime, UniqueConstraint
1
+ from sqlalchemy import Column, String, Integer, Boolean, DateTime, UniqueConstraint, ForeignKey
2
2
  from sqlalchemy.sql import func
3
3
  from dataflow.db import Base
4
4
 
@@ -10,16 +10,17 @@ class Connection(Base):
10
10
 
11
11
  id = Column(Integer, primary_key=True, index=True)
12
12
  conn_id = Column(String, index=True, nullable=False)
13
+ org_id = Column(Integer, ForeignKey("ORGANIZATION.id"), index=True, nullable=False)
13
14
  description = Column(String, nullable=True)
14
15
  conn_type = Column(String, nullable=False)
15
16
  runtime = Column(String, nullable=True)
16
17
  slug = Column(String, nullable=True)
17
- status = Column(Boolean, default=False)
18
+ status = Column(Boolean, default=False, server_default='false')
18
19
  created_by = Column(String, nullable=True)
19
20
  created_at = Column(DateTime(timezone=True), server_default=func.now())
20
21
  updated_at = Column(DateTime(timezone=True), onupdate=func.now())
21
- is_active = Column(Boolean, default=True)
22
+ is_active = Column(Boolean, default=True, server_default='true')
22
23
 
23
24
  __table_args__ = (
24
- UniqueConstraint('conn_id', 'runtime', 'slug', 'is_active', 'created_by', name='uq_active_conn_with_runtime_slug'),
25
+ UniqueConstraint('conn_id', 'org_id', 'runtime', 'slug', 'is_active', 'created_by', name='uq_active_conn_with_runtime_slug'),
25
26
  )
@@ -8,10 +8,9 @@ class DataflowZone(Base):
8
8
  id = Column(Integer, primary_key=True, autoincrement=True)
9
9
  slug = Column(String, unique=True, nullable=False)
10
10
  display_name = Column(String, nullable=False)
11
- is_runtime = Column(Boolean, default=False)
11
+ is_runtime = Column(Boolean, default=False, server_default='false')
12
12
  subdomain = Column(String)
13
- spark_enabled = Column(Boolean, default=False)
14
- display_order = Column(Integer, default=0)
13
+ display_order = Column(Integer, default=0, server_default='0')
15
14
 
16
15
  role_zone_assocs = relationship("RoleZone", back_populates="zone")
17
16
 
@@ -0,0 +1,125 @@
1
+ from sqlalchemy import (
2
+ Column, Integer, String, Boolean, Text,
3
+ ForeignKey, DateTime, UniqueConstraint, CheckConstraint
4
+ )
5
+ from sqlalchemy.orm import relationship, Session
6
+ from sqlalchemy.sql import func
7
+ from datetime import datetime, timezone
8
+ from dataflow.db import Base
9
+ from enum import Enum
10
+
11
+ class EnvironmentAttributes(Base):
12
+ """
13
+ Shared columns between Environment and ArchivedEnvironment.
14
+ """
15
+ __abstract__ = True
16
+
17
+ name = Column(String, nullable=False)
18
+ url = Column(String)
19
+ enabled = Column(Boolean, default=True, server_default='true')
20
+ version = Column(String, default=0, server_default='0')
21
+ is_latest = Column(Boolean, default=True, server_default='true')
22
+ base_env_id = Column(Integer, default=None)
23
+ short_name = Column(String(5))
24
+ status = Column(String, default="Saved", server_default="Saved")
25
+ icon = Column(String)
26
+ py_version = Column(String)
27
+ r_version = Column(String)
28
+ pip_libraries = Column(Text)
29
+ conda_libraries = Column(Text)
30
+ r_requirements = Column(Text)
31
+ created_date = Column(DateTime, server_default=func.now())
32
+ created_by = Column(String)
33
+ org_id = Column(Integer, ForeignKey('ORGANIZATION.id'))
34
+
35
+ class Environment(EnvironmentAttributes):
36
+ __tablename__ = 'ENVIRONMENT'
37
+ __table_args__ = (UniqueConstraint('short_name', 'org_id', name='_env_short_name_org_uc'),)
38
+ id = Column(Integer, primary_key=True, autoincrement=True)
39
+
40
+ # Relationships
41
+ organization = relationship("Organization", back_populates="environments")
42
+ archived_versions = relationship("ArchivedEnvironment", back_populates="original_environment")
43
+
44
+ class ArchivedEnvironment(EnvironmentAttributes):
45
+ __tablename__ = 'ARCHIVED_ENVIRONMENT'
46
+
47
+ id = Column(Integer, primary_key=True, autoincrement=True)
48
+ original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'))
49
+
50
+ # Relationship with Environment
51
+ original_environment = relationship("Environment", back_populates="archived_versions")
52
+
53
+ class JobLogs(Base):
54
+ __tablename__ = "JOB_LOG"
55
+ __table_args__ = (UniqueConstraint('log_file_name', 'org_id', name='_job_log_file_org_uc'),)
56
+
57
+ id = Column(Integer, primary_key=True, index=True)
58
+ created_at = Column(DateTime, default=datetime.now, server_default=func.now())
59
+ completed_at = Column(DateTime, nullable=True)
60
+ log_file_name = Column(String, nullable=False)
61
+ log_file_location = Column(String, nullable=False)
62
+ status = Column(String)
63
+ created_by = Column(String)
64
+ org_id = Column(Integer, ForeignKey('ORGANIZATION.id', ondelete='CASCADE'))
65
+
66
+
67
+ class LocalEnvironment(Base):
68
+ __tablename__ = "LOCAL_ENVIRONMENT"
69
+
70
+ id = Column(Integer, primary_key=True, autoincrement=True)
71
+ name = Column(String, nullable=False, index=True)
72
+ user_name = Column(String, ForeignKey('USER.user_name', ondelete='CASCADE'), nullable=False, index=True)
73
+ org_id = Column(Integer, ForeignKey('ORGANIZATION.id', ondelete='CASCADE'), nullable=False, index=True)
74
+ py_version = Column(String)
75
+ pip_libraries = Column(Text)
76
+ conda_libraries = Column(Text)
77
+ status = Column(String, default="Created", server_default="Created")
78
+ cloned_from = Column(String, nullable=True)
79
+ updated_at = Column(DateTime, default=datetime.now(timezone.utc), onupdate=datetime.now(timezone.utc))
80
+ need_refresh = Column(Boolean, default=False, server_default='false')
81
+
82
+ class EnvType(str, Enum):
83
+ dataflow = "dataflow"
84
+ local = "local"
85
+
86
+ class PipSource(Base):
87
+ __tablename__ = "PIP_SOURCE"
88
+
89
+ id = Column(Integer, primary_key=True, autoincrement=True)
90
+
91
+ org_id = Column(Integer, ForeignKey("ORGANIZATION.id", ondelete="CASCADE"), nullable=False, index=True)
92
+ user_name = Column(String, ForeignKey("USER.user_name", ondelete="CASCADE"), nullable=True, index=True)
93
+
94
+ name = Column(String, nullable=False)
95
+ url = Column(String, nullable=False)
96
+ is_index = Column(Boolean, default=False, nullable=False, server_default='false')
97
+
98
+ created_at = Column(DateTime, default=datetime.now(timezone.utc), nullable=False)
99
+ updated_at = Column(DateTime, default=datetime.now(timezone.utc), onupdate=datetime.now(timezone.utc), nullable=False)
100
+
101
+ __table_args__ = (
102
+ UniqueConstraint("org_id", "name", "user_name", name="uq_pip_source_per_user_org"),
103
+ CheckConstraint("NOT (is_index = TRUE AND user_name IS NOT NULL)", name="check_no_user_index_url"),
104
+ )
105
+
106
+ @classmethod
107
+ def get_org_sources(cls, session: Session, org_id: int):
108
+ """
109
+ Returns all sources for the given org (org-level).
110
+ """
111
+ return session.query(cls).filter(
112
+ cls.org_id == org_id,
113
+ cls.user_name == None
114
+ ).all()
115
+
116
+ @classmethod
117
+ def get_user_sources(cls, session: Session, org_id: int, user_name: str):
118
+ """
119
+ Returns merged sources for a user in an org (org-level + user-level personal sources).
120
+ """
121
+ return session.query(cls).filter(
122
+ cls.org_id == org_id,
123
+ ((cls.user_name == None) | (cls.user_name == user_name))
124
+ ).all()
125
+
@@ -8,11 +8,12 @@ class GitSSH(Base):
8
8
 
9
9
  id = Column(Integer, primary_key=True, index=True, autoincrement=True)
10
10
  user_name = Column(String, ForeignKey('USER.user_name', ondelete="CASCADE"), nullable=False)
11
+ org_id = Column(Integer, ForeignKey("ORGANIZATION.id"), index=True, nullable=False)
11
12
  description = Column(String)
12
13
  key_name = Column(String, nullable=False)
13
14
  created_date = Column(DateTime, server_default=func.now(), nullable=False)
14
15
  last_used_date = Column(DateTime)
15
16
 
16
17
  __table_args__ = (
17
- UniqueConstraint(user_name, key_name, name='user_name_key_name_unique'),
18
+ UniqueConstraint(user_name, key_name, org_id, name='user_name_key_name_org_id_unique'),
18
19
  )