dataflow-core 2.0.3__py3-none-any.whl → 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataflow-core might be problematic. Click here for more details.

authenticator/__init__.py CHANGED
File without changes
@@ -1,34 +1,19 @@
1
- from .package.configuration import ConfigurationManager
2
1
  from .package.models.database import DatabaseManager
3
- from .package.models import (
4
- user as m_user,
5
- session as m_session
6
- )
2
+ from dataflow.dataflow import Dataflow
7
3
 
8
4
  from typing import Any, Callable
9
5
  from airflow.www.security import FabAirflowSecurityManagerOverride
6
+ from airflow.configuration import conf
10
7
 
11
8
  class DataflowAirflowAuthenticator(FabAirflowSecurityManagerOverride):
12
9
  def __init__(self, wsgi_app: Callable) -> None:
13
10
  self.wsgi_app = wsgi_app
11
+ self.dataflow = Dataflow()
12
+ self.airflow_database_url = conf.get("database", "sql_alchemy_conn")
14
13
 
15
- # Dataflow database configuration
16
- self.dataflow_config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
17
- self.dataflow_database_url = self.dataflow_config.get_config_value('database', 'database_url')
18
-
19
- self.dataflow_db_instance = DatabaseManager(self.dataflow_database_url)
20
- self.dataflow_db = next(self.dataflow_db_instance.get_session())
21
-
22
- # Airflow database configuration
23
- self.airflow_config = ConfigurationManager('airflow.cfg')
24
- self.airflow_database_url = self.airflow_config.get_config_value('database', 'sql_alchemy_conn')
25
-
26
14
  self.airflow_db_instance = DatabaseManager(self.airflow_database_url)
27
15
  self.airflow_db = next(self.airflow_db_instance.get_session())
28
16
 
29
- m_user.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
30
- m_session.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
31
-
32
17
  def __call__(self, environ: dict, start_response: Callable) -> Any:
33
18
 
34
19
  path = environ.get('PATH_INFO', '')
@@ -51,39 +36,28 @@ class DataflowAirflowAuthenticator(FabAirflowSecurityManagerOverride):
51
36
  user_session_id = user_session_id.split('=')[1]
52
37
 
53
38
  # Retrieving user details
54
- user_data = self.find_dataflow_user(user_session_id)
39
+ user_data = self.dataflow.auth(user_session_id)
55
40
 
56
41
  if user_data is None:
57
42
  raise Exception("No user found for the dataflow_session id")
58
43
 
59
- user = self.find_user(user_data.user_name)
44
+ user = self.find_user(user_data["user_name"])
60
45
 
61
46
  if not user:
62
- user_role = self.find_role(user_data.role.title())
63
- user = self.add_user(username=user_data.user_name, first_name=self.not_none(user_data.first_name), last_name=self.not_none(user_data.last_name), email=self.not_none(user_data.email), role=user_role, password=self.not_none(user_data.password))
47
+ user_role = self.find_role(user_data["role"].title())
48
+ user = self.add_user(
49
+ username=user_data["user_name"],
50
+ first_name=user_data.get("first_name", ""),
51
+ last_name=user_data.get("last_name", ""),
52
+ email=user_data.get("email", ""),
53
+ role=user_role
54
+ )
64
55
 
65
56
  environ['REMOTE_USER'] = user.username
66
- self.write_user_id(user_data.user_id)
67
57
  return self.wsgi_app(environ, start_response)
68
58
 
69
59
  except Exception as e:
70
60
  return self.wsgi_app(environ, start_response)
71
-
72
- def not_none(self, value):
73
- return value if value is not None else ""
74
-
75
- def find_dataflow_user(self, user_session_id):
76
- """Find user by session_id in dataflow database."""
77
- query = self.dataflow_db.query(m_session.Session_table)
78
- session = query.filter(m_session.Session_table.session_id == user_session_id).first()
79
- if session is None:
80
- return None
81
-
82
- user_data = self.dataflow_db.query(m_user.User).filter(m_user.User.user_id == session.user_id).first()
83
- if user_data is None:
84
- return None
85
-
86
- return user_data
87
61
 
88
62
  def find_user(self, username=None):
89
63
  """Find user by username or email."""
@@ -106,16 +80,4 @@ class DataflowAirflowAuthenticator(FabAirflowSecurityManagerOverride):
106
80
  self.airflow_db.add(user)
107
81
  self.airflow_db.commit()
108
82
  return user
109
-
110
- def write_user_id(self, user_id):
111
- """
112
- Write the given user_id to a file named dataflow_user_id.txt.
113
-
114
- Args:
115
- user_id (str): The user ID to be written to the file.
116
- """
117
- file_name = 'dataflow_user_id.txt'
118
- with open(file_name, 'w') as file:
119
- file.write(str(user_id))
120
-
121
83
 
File without changes
@@ -9,39 +9,30 @@ from .package.models import (
9
9
  user as m_user,
10
10
  session as m_session
11
11
  )
12
+ from dataflow.dataflow import Dataflow
12
13
 
13
14
  class CustomAuthDBView(AuthDBView):
14
15
  def __init__(self):
15
- self.dataflow_config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
16
- self.dataflow_database_url = self.dataflow_config.get_config_value('database', 'database_url')
17
- self.dataflow_db_instance = DatabaseManager(self.dataflow_database_url)
18
- self.dataflow_db = next(self.dataflow_db_instance.get_session())
19
- m_user.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
20
- m_session.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
21
-
22
- def _get_user_id_from_session(self, session_id):
23
- query = self.dataflow_db.query(m_session.Session_table).filter(m_session.Session_table.session_id == session_id).first()
24
- return query.user_id if query!=None else None
25
-
26
- def _get_user_details_from_user_table(self, user_id):
27
- user_details = self.dataflow_db.query(m_user.User).filter(m_user.User.user_id == user_id).first()
28
- return user_details if user_details!=None else None
29
-
30
- def not_none(self, value):
31
- return value if value is not None else ""
16
+ self.dataflow = Dataflow()
32
17
 
33
18
  @expose('/login/', methods=['GET'])
34
19
  def login(self):
35
20
  try:
36
21
  session_id = request.cookies.get('dataflow_session')
37
22
 
38
- user_id = self._get_user_id_from_session(session_id)
39
- user_details = self._get_user_details_from_user_table(user_id)
40
- user = self.appbuilder.sm.find_user(username=user_details.user_name)
23
+ user_details = self.dataflow.auth(session_id)
24
+ user = self.appbuilder.sm.find_user(username=user_details['user_name'])
41
25
  if user:
42
26
  login_user(user, remember=False)
43
27
  else:
44
- user = self.appbuilder.sm.add_user(username=self.not_none(user_details.user_name), first_name=self.not_none(user_details.first_name), last_name=self.not_none(user_details.last_name), email=self.not_none(user_details.email), role=self.appbuilder.sm.find_role('Admin'), password=self.not_none(user_details.password))
28
+ user = self.appbuilder.sm.add_user(
29
+ username=user_details['user_name'],
30
+ first_name=user_details.get("first_name", ""),
31
+ last_name=user_details.get("last_name", ""),
32
+ email=user_details.get("email", ""),
33
+ role=self.appbuilder.sm.find_role('Admin'),
34
+ password=""
35
+ )
45
36
  if user:
46
37
  login_user(user, remember=False)
47
38
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
dataflow/__init__.py CHANGED
@@ -0,0 +1 @@
1
+ from .dataflow import Dataflow
dataflow/configuration.py CHANGED
File without changes
dataflow/dataflow.py CHANGED
@@ -1,45 +1,40 @@
1
- import os
2
- from .configuration import ConfigurationManager
3
- from .models import (
4
- session as m_session,
5
- user as m_user,
6
- )
1
+ import os, requests
7
2
  from .models.database import DatabaseManager
8
3
  from sqlalchemy.inspection import inspect
9
4
  from .utils.aws_secrets_manager import SecretsManagerClient
10
- import json
5
+ import json, asyncio, pkg_resources
6
+ from authenticator.package.configuration import ConfigurationManager
11
7
 
12
8
 
13
9
  class Dataflow:
14
10
  def __init__(self):
15
- self.dataflow_config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
16
- self.dataflow_database_url = self.dataflow_config.get_config_value('database', 'database_url')
17
-
18
- self.dataflow_db_instance = DatabaseManager(self.dataflow_database_url)
19
- self.dataflow_db = next(self.dataflow_db_instance.get_session())
20
-
21
11
  self.secrets_manager = SecretsManagerClient('us-east-1')
22
12
 
23
- m_user.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
24
- m_session.Base.metadata.create_all(bind=self.dataflow_db_instance.get_engine())
25
-
26
13
  def auth(self, session_id: str):
27
- """Find user by session_id in dataflow database."""
14
+ """Retrieve user information from the auth API."""
28
15
  try:
29
- query = self.dataflow_db.query(m_session.Session_table)
30
- session = query.filter(m_session.Session_table.session_id == session_id).first()
31
- if session is None:
32
- return False
33
-
34
- user_data = self.dataflow_db.query(m_user.User).filter(m_user.User.user_id == session.user_id).first()
35
- if user_data is None:
36
- return False
16
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
17
+ auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
18
+ response = requests.get(
19
+ auth_api,
20
+ cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
21
+ )
37
22
 
38
- user_dict = {"user_name": user_data.user_name, "name": f"{user_data.first_name} {user_data.last_name}", "email": user_data.email, "role": user_data.role}
23
+ if response.status_code != 200:
24
+ return response.json()
25
+
26
+ user_data = response.json()
27
+ user_dict = {
28
+ "user_name": user_data["user_name"],
29
+ "first_name": user_data["first_name"],
30
+ "last_name": user_data["last_name"] if user_data.get("last_name") else "",
31
+ "email": user_data["email"],
32
+ "role": user_data["role"]
33
+ }
39
34
  return user_dict
40
-
35
+
41
36
  except Exception as e:
42
- return False
37
+ return e
43
38
 
44
39
  def variable(self, variable_name: str):
45
40
  """Get variable value from secrets manager."""
@@ -91,3 +86,42 @@ class Dataflow:
91
86
 
92
87
  except Exception as e:
93
88
  return None
89
+
90
+ async def create_env(self, env_name, py_version, py_requirements, status, env_version=None):
91
+ """
92
+ Creates a conda environment at the specified path and installs libraries in one command.
93
+ """
94
+ config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
95
+ status = status.lower()
96
+ if status == "published":
97
+ env_base_path = config.get_config_value('paths', 'published_env_path')
98
+ conda_env_path = os.path.join(env_base_path, env_name)
99
+ else:
100
+ env_base_path = config.get_config_value('paths', 'drafts_env_path')
101
+ conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
102
+ try:
103
+ if not os.path.exists(conda_env_path):
104
+ os.makedirs(conda_env_path, exist_ok=True)
105
+
106
+ py_requirements = ",".join(py_requirements)
107
+
108
+ script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
109
+
110
+ # Make the script executable
111
+ os.chmod(script_path, 0o755)
112
+
113
+ # Prepare command with arguments
114
+ command = ["bash", script_path, py_requirements, conda_env_path, py_version]
115
+
116
+ process = await asyncio.create_subprocess_exec(
117
+ *command,
118
+ stdout=asyncio.subprocess.PIPE,
119
+ stderr=asyncio.subprocess.PIPE
120
+ )
121
+
122
+ return process
123
+ except OSError as e:
124
+ print(f"OS error while creating {conda_env_path}: {e}")
125
+ except Exception as e:
126
+ print(f"Unexpected error while creating {conda_env_path}: {e}")
127
+ return {"error": str(e)}
File without changes
File without changes
File without changes
dataflow/models/user.py CHANGED
File without changes
@@ -0,0 +1,34 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ IFS=',' read -r -a libraries <<< $1
5
+ conda_env_path=$2
6
+ py_version=$3
7
+
8
+ # Use an isolated conda package cache to avoid concurrency issues
9
+ export CONDA_PKGS_DIRS=$(mktemp -d)
10
+ # to delete conda package cache after script finishes
11
+ trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
12
+
13
+ # 1. Creating conda environment
14
+ conda create --prefix ${conda_env_path} --yes python=${py_version}
15
+ ${conda_env_path}/bin/pip install --root-user-action ignore ${libraries[@]}
16
+
17
+ # 3. Install Dataflow Airflow to a separate path in environment
18
+ ${conda_env_path}/bin/pip install \
19
+ --force-reinstall --root-user-action ignore \
20
+ --no-warn-conflicts dataflow-airflow==2.10.3 \
21
+ --target ${conda_env_path}/bin/airflow-libraries/ \
22
+ --constraint https://raw.githubusercontent.com//apache/airflow/constraints-2.10.3/constraints-${py_version}.txt || true
23
+
24
+ files=(
25
+ ${conda_env_path}/lib/python${py_version}/site-packages/dbt/config/profile.py
26
+ ${conda_env_path}/lib/python${py_version}/site-packages/dbt/task/debug.py
27
+ )
28
+ for file in ${files[@]}
29
+ do
30
+ awk '{gsub("from dbt.clients.yaml_helper import load_yaml_text", "from dbt.dataflow_config.secrets_manager import load_yaml_text"); print}' $file > temp
31
+ mv temp $file
32
+ done
33
+
34
+ echo "Environment Creation Successful"
File without changes
File without changes
@@ -1,11 +1,18 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataflow-core
3
- Version: 2.0.3
3
+ Version: 2.0.6
4
4
  Summary: Dataflow core package
5
+ Home-page: UNKNOWN
5
6
  Author: Dataflow
6
- Author-email:
7
+ Author-email: UNKNOWN
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
7
10
  Requires-Dist: sqlalchemy
8
11
  Requires-Dist: boto3
9
12
  Requires-Dist: psycopg2-binary
10
13
  Requires-Dist: pymysql
14
+ Requires-Dist: requests
15
+
16
+ UNKNOWN
17
+
11
18
 
@@ -1,24 +1,25 @@
1
1
  authenticator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- authenticator/dataflowairflowauthenticator.py,sha256=wzd807UAb5ojIVMO8FVHQnNi3Tc78NjkhbDSfzgo2Qg,4790
2
+ authenticator/dataflowairflowauthenticator.py,sha256=PcGlL2cq5EA9RdtClyrOupvNXxk6h54UPQcQ-g4VQtA,3117
3
3
  authenticator/dataflowhubauthenticator.py,sha256=UgjZiXSP-hlbciyJVb-QIgUjS3_EgX0WIvbhD563rVw,2918
4
- authenticator/dataflowsupersetauthenticator.py,sha256=UNgem68eg8G-1uAok2yVc6NvUeXaJ-9MPmdpfSyr9Jg,2759
4
+ authenticator/dataflowsupersetauthenticator.py,sha256=M2pNpIdmWwzJVuMQ6trXLWQT4HpcYT84bC93uOSm0fQ,1772
5
5
  authenticator/package/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  authenticator/package/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
7
7
  authenticator/package/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  authenticator/package/models/database.py,sha256=y09pqnglsBBtDZlyhvqDAlpUSFovwAzBAi6jOYl_XNk,896
9
9
  authenticator/package/models/session.py,sha256=j6PhbrTMJxEkeDT4Vf5SqGtM_LI_vZy9O4vxn6LtIbc,495
10
10
  authenticator/package/models/user.py,sha256=IYogp_vt0yDBG5i936uNPjgTis77VYPzITn9XpQUIyw,788
11
- dataflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ dataflow/__init__.py,sha256=WTRg8HMpMWSgxYJ9ZGVldx4k07fAbta3mBmZ1hG9mWE,30
12
12
  dataflow/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
13
- dataflow/dataflow.py,sha256=CAn47BkdUwmTWe-kTSFKfRZYnjx29MG0SrYsXpvSkuo,3726
13
+ dataflow/dataflow.py,sha256=viEKxu1bVFtenbPx8etCb7Gf-Hbwv0KvkE7occVbX8I,5136
14
14
  dataflow/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  dataflow/models/database.py,sha256=y09pqnglsBBtDZlyhvqDAlpUSFovwAzBAi6jOYl_XNk,896
16
16
  dataflow/models/session.py,sha256=C9crPh6ZDFuL27hZ_zhUXDZZ0ZiIDE8ZD19O_4WPw-I,488
17
17
  dataflow/models/user.py,sha256=IYogp_vt0yDBG5i936uNPjgTis77VYPzITn9XpQUIyw,788
18
+ dataflow/scripts/create_environment.sh,sha256=r4cKRTMf1G9pXx1fqMpzkvd-ROzUH0Qp16f9t84Nh5c,1300
18
19
  dataflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  dataflow/utils/aws_secrets_manager.py,sha256=FqHm3YRynv580FpFsS0RfI1MSGY5aq-V7t4blpiYsS4,2588
20
- dataflow_core-2.0.3.dist-info/METADATA,sha256=N3tOAUkMWoSaOXpJn6UkzZ-p9Rpfb51xYVmAOEmJBX8,222
21
- dataflow_core-2.0.3.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
22
- dataflow_core-2.0.3.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
23
- dataflow_core-2.0.3.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
24
- dataflow_core-2.0.3.dist-info/RECORD,,
21
+ dataflow_core-2.0.6.dist-info/METADATA,sha256=BCXu55SZS71k7ZQ2ilfFn_nimogjDv0SDKtrDyje5iw,317
22
+ dataflow_core-2.0.6.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
23
+ dataflow_core-2.0.6.dist-info/entry_points.txt,sha256=lDLG2MMWlKfkqsVWFghF7sx-yEvM2xqMmHE7rMTysE4,118
24
+ dataflow_core-2.0.6.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
25
+ dataflow_core-2.0.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +1,3 @@
1
1
  [jupyterhub.authenticators]
2
2
  dataflow_authenticator = authenticator.dataflowhubauthenticator:DataflowHubAuthenticator
3
+