dataflow-core 2.0.5__tar.gz → 2.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. dataflow_core-2.0.7/PKG-INFO +14 -0
  2. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/dataflowhubauthenticator.py +17 -1
  3. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/dataflow.py +43 -4
  4. dataflow_core-2.0.7/dataflow/scripts/create_environment.sh +34 -0
  5. dataflow_core-2.0.7/dataflow_core.egg-info/PKG-INFO +14 -0
  6. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow_core.egg-info/SOURCES.txt +1 -2
  7. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow_core.egg-info/entry_points.txt +0 -1
  8. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/setup.py +6 -4
  9. dataflow-core-2.0.5/PKG-INFO +0 -10
  10. dataflow-core-2.0.5/dataflow/models/session.py +0 -17
  11. dataflow-core-2.0.5/dataflow/models/user.py +0 -23
  12. dataflow-core-2.0.5/dataflow_core.egg-info/PKG-INFO +0 -10
  13. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/README.md +0 -0
  14. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/__init__.py +0 -0
  15. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/dataflowairflowauthenticator.py +0 -0
  16. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/dataflowsupersetauthenticator.py +0 -0
  17. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/__init__.py +0 -0
  18. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/configuration.py +0 -0
  19. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/models/__init__.py +0 -0
  20. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/models/database.py +0 -0
  21. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/models/session.py +0 -0
  22. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/authenticator/package/models/user.py +0 -0
  23. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/__init__.py +0 -0
  24. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/configuration.py +0 -0
  25. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/models/__init__.py +0 -0
  26. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/models/database.py +0 -0
  27. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/utils/__init__.py +0 -0
  28. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow/utils/aws_secrets_manager.py +0 -0
  29. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow_core.egg-info/dependency_links.txt +0 -0
  30. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow_core.egg-info/requires.txt +0 -0
  31. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/dataflow_core.egg-info/top_level.txt +0 -0
  32. {dataflow-core-2.0.5 → dataflow_core-2.0.7}/setup.cfg +0 -0
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataflow-core
3
+ Version: 2.0.7
4
+ Summary: Dataflow core package
5
+ Author: Dataflow
6
+ Author-email:
7
+ Requires-Dist: sqlalchemy
8
+ Requires-Dist: boto3
9
+ Requires-Dist: psycopg2-binary
10
+ Requires-Dist: pymysql
11
+ Requires-Dist: requests
12
+ Dynamic: author
13
+ Dynamic: requires-dist
14
+ Dynamic: summary
@@ -65,7 +65,23 @@ class DataflowHubAuthenticator(Authenticator):
65
65
  self.db.refresh(db_item)
66
66
 
67
67
  expires = datetime.now(timezone.utc) + timedelta(days=365)
68
- handler.set_cookie("dataflow_session", session_id, expires=expires)
68
+ host = handler.request.host
69
+ parts = host.split('.')
70
+ if len(parts) >= 2:
71
+ domain = '.'.join(parts[-2:])
72
+ else:
73
+ domain = host
74
+ base_domain = f".{domain}"
75
+ handler.set_cookie(
76
+ "dataflow_session",
77
+ session_id,
78
+ domain=base_domain,
79
+ path="/",
80
+ expires=expires,
81
+ secure=True,
82
+ httponly=True,
83
+ samesite="None"
84
+ )
69
85
  user_dict = {"name": username, "session_id": session_id}
70
86
  return user_dict
71
87
 
@@ -2,21 +2,21 @@ import os, requests
2
2
  from .models.database import DatabaseManager
3
3
  from sqlalchemy.inspection import inspect
4
4
  from .utils.aws_secrets_manager import SecretsManagerClient
5
- import json
5
+ import json, asyncio, pkg_resources
6
6
  from authenticator.package.configuration import ConfigurationManager
7
7
 
8
8
 
9
9
  class Dataflow:
10
10
  def __init__(self):
11
- self.dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
12
- self.auth_api = self.dataflow_config.get_config_value('auth', 'ui_auth_api')
13
11
  self.secrets_manager = SecretsManagerClient('us-east-1')
14
12
 
15
13
  def auth(self, session_id: str):
16
14
  """Retrieve user information from the auth API."""
17
15
  try:
16
+ dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
17
+ auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
18
18
  response = requests.get(
19
- self.auth_api,
19
+ auth_api,
20
20
  cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
21
21
  )
22
22
 
@@ -86,3 +86,42 @@ class Dataflow:
86
86
 
87
87
  except Exception as e:
88
88
  return None
89
+
90
+ async def create_env(self, env_name, py_version, py_requirements, status, env_version=None):
91
+ """
92
+ Creates a conda environment at the specified path and installs libraries in one command.
93
+ """
94
+ config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
95
+ status = status.lower()
96
+ if status == "published":
97
+ env_base_path = config.get_config_value('paths', 'published_env_path')
98
+ conda_env_path = os.path.join(env_base_path, env_name)
99
+ else:
100
+ env_base_path = config.get_config_value('paths', 'drafts_env_path')
101
+ conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
102
+ try:
103
+ if not os.path.exists(conda_env_path):
104
+ os.makedirs(conda_env_path, exist_ok=True)
105
+
106
+ py_requirements = ",".join(py_requirements)
107
+
108
+ script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
109
+
110
+ # Make the script executable
111
+ os.chmod(script_path, 0o755)
112
+
113
+ # Prepare command with arguments
114
+ command = ["bash", script_path, py_requirements, conda_env_path, py_version]
115
+
116
+ process = await asyncio.create_subprocess_exec(
117
+ *command,
118
+ stdout=asyncio.subprocess.PIPE,
119
+ stderr=asyncio.subprocess.PIPE
120
+ )
121
+
122
+ return process
123
+ except OSError as e:
124
+ print(f"OS error while creating {conda_env_path}: {e}")
125
+ except Exception as e:
126
+ print(f"Unexpected error while creating {conda_env_path}: {e}")
127
+ return {"error": str(e)}
@@ -0,0 +1,34 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ IFS=',' read -r -a libraries <<< $1
5
+ conda_env_path=$2
6
+ py_version=$3
7
+
8
+ # Use an isolated conda package cache to avoid concurrency issues
9
+ export CONDA_PKGS_DIRS=$(mktemp -d)
10
+ # to delete conda package cache after script finishes
11
+ trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
12
+
13
+ # 1. Creating conda environment
14
+ conda create --prefix ${conda_env_path} --yes python=${py_version}
15
+ ${conda_env_path}/bin/pip install --root-user-action ignore ${libraries[@]}
16
+
17
+ # 3. Install Dataflow Airflow to a separate path in environment
18
+ ${conda_env_path}/bin/pip install \
19
+ --force-reinstall --root-user-action ignore \
20
+ --no-warn-conflicts dataflow-airflow==2.10.5 \
21
+ --target ${conda_env_path}/bin/airflow-libraries/ \
22
+ --constraint https://raw.githubusercontent.com//apache/airflow/constraints-2.10.5/constraints-${py_version}.txt || true
23
+
24
+ files=(
25
+ ${conda_env_path}/lib/python${py_version}/site-packages/dbt/config/profile.py
26
+ ${conda_env_path}/lib/python${py_version}/site-packages/dbt/task/debug.py
27
+ )
28
+ for file in ${files[@]}
29
+ do
30
+ awk '{gsub("from dbt.clients.yaml_helper import load_yaml_text", "from dbt.dataflow_config.secrets_manager import load_yaml_text"); print}' $file > temp
31
+ mv temp $file
32
+ done
33
+
34
+ echo "Environment Creation Successful"
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataflow-core
3
+ Version: 2.0.7
4
+ Summary: Dataflow core package
5
+ Author: Dataflow
6
+ Author-email:
7
+ Requires-Dist: sqlalchemy
8
+ Requires-Dist: boto3
9
+ Requires-Dist: psycopg2-binary
10
+ Requires-Dist: pymysql
11
+ Requires-Dist: requests
12
+ Dynamic: author
13
+ Dynamic: requires-dist
14
+ Dynamic: summary
@@ -15,8 +15,7 @@ dataflow/configuration.py
15
15
  dataflow/dataflow.py
16
16
  dataflow/models/__init__.py
17
17
  dataflow/models/database.py
18
- dataflow/models/session.py
19
- dataflow/models/user.py
18
+ dataflow/scripts/create_environment.sh
20
19
  dataflow/utils/__init__.py
21
20
  dataflow/utils/aws_secrets_manager.py
22
21
  dataflow_core.egg-info/PKG-INFO
@@ -1,3 +1,2 @@
1
1
  [jupyterhub.authenticators]
2
2
  dataflow_authenticator = authenticator.dataflowhubauthenticator:DataflowHubAuthenticator
3
-
@@ -2,10 +2,12 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="dataflow-core",
5
- version="2.0.5",
6
- packages=find_packages(exclude=["tests", "tests.*"]),
5
+ version="2.0.7",
6
+ packages=find_packages(include=["dataflow", "dataflow.*", "authenticator", "authenticator.*"]),
7
7
  include_package_data=True,
8
- package_data={},
8
+ package_data={
9
+ "dataflow": ["scripts/*.sh"],
10
+ },
9
11
  install_requires=[
10
12
  'sqlalchemy',
11
13
  'boto3',
@@ -21,4 +23,4 @@ setup(
21
23
  'dataflow_authenticator = authenticator.dataflowhubauthenticator:DataflowHubAuthenticator',
22
24
  ],
23
25
  },
24
- )
26
+ )
@@ -1,10 +0,0 @@
1
- Metadata-Version: 1.0
2
- Name: dataflow-core
3
- Version: 2.0.5
4
- Summary: Dataflow core package
5
- Home-page: UNKNOWN
6
- Author: Dataflow
7
- Author-email: UNKNOWN
8
- License: UNKNOWN
9
- Description: UNKNOWN
10
- Platform: UNKNOWN
@@ -1,17 +0,0 @@
1
- """models.py"""
2
- from sqlalchemy import Column, Integer, String
3
- from sqlalchemy.ext.declarative import declarative_base
4
-
5
- #instance for create declarative base
6
- Base=declarative_base()
7
-
8
- class Session_table(Base):
9
- """
10
- Table SESSIONS
11
- """
12
-
13
- __tablename__='SESSION'
14
-
15
- id = Column(Integer, primary_key=True, index=True, unique=True, nullable=False, autoincrement=True)
16
- session_id = Column(String, unique=True, nullable=False)
17
- user_id = Column(String, nullable=False)
@@ -1,23 +0,0 @@
1
- """models.py"""
2
- from sqlalchemy import Column, Integer, String, LargeBinary, Enum
3
- from sqlalchemy.ext.declarative import declarative_base
4
-
5
- #instance for create declarative base
6
- Base=declarative_base()
7
-
8
- class User(Base):
9
- """
10
- Table USER
11
- """
12
-
13
- __tablename__='USER'
14
-
15
- user_id = Column(Integer, primary_key=True, index=True, autoincrement=True, nullable=False)
16
- user_name = Column(String, unique=True, nullable=False)
17
- first_name = Column(String)
18
- last_name = Column(String)
19
- email = Column(String, unique=True)
20
- role = Column(Enum('admin', 'user', name='role_field'), nullable=False)
21
- image = Column(LargeBinary)
22
- active = Column(Enum('N', 'Y', name='active_field'), nullable=False, server_default=str("N"))
23
- password = Column(String, nullable=False)
@@ -1,10 +0,0 @@
1
- Metadata-Version: 1.0
2
- Name: dataflow-core
3
- Version: 2.0.5
4
- Summary: Dataflow core package
5
- Home-page: UNKNOWN
6
- Author: Dataflow
7
- Author-email: UNKNOWN
8
- License: UNKNOWN
9
- Description: UNKNOWN
10
- Platform: UNKNOWN
File without changes
File without changes