altasigma 0.0.4.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- altasigma-0.0.4.post2/LICENSE +2 -0
- altasigma-0.0.4.post2/MANIFEST.in +2 -0
- altasigma-0.0.4.post2/PKG-INFO +51 -0
- altasigma-0.0.4.post2/README_pypi.md +20 -0
- altasigma-0.0.4.post2/altasigma/__init__.py +35 -0
- altasigma-0.0.4.post2/altasigma/config/__init__.py +6 -0
- altasigma-0.0.4.post2/altasigma/config/config.py +306 -0
- altasigma-0.0.4.post2/altasigma/config/http_session.py +67 -0
- altasigma-0.0.4.post2/altasigma/credentials/__init__.py +8 -0
- altasigma-0.0.4.post2/altasigma/credentials/credential_utils.py +394 -0
- altasigma-0.0.4.post2/altasigma/credentials/token_refresher.py +31 -0
- altasigma-0.0.4.post2/altasigma/initialize.py +26 -0
- altasigma-0.0.4.post2/altasigma/io/__init__.py +24 -0
- altasigma-0.0.4.post2/altasigma/io/augurdata.py +135 -0
- altasigma-0.0.4.post2/altasigma/io/data_management.py +588 -0
- altasigma-0.0.4.post2/altasigma/io/files.py +114 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/__init__.py +35 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/job_supervisor_abstract.py +147 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/job_supervisor_dev_mock.py +142 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/job_supervisor_helpers.py +61 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/job_supervisor_http.py +282 -0
- altasigma-0.0.4.post2/altasigma/jobsupervisor/reports.py +35 -0
- altasigma-0.0.4.post2/altasigma/progress_reporter/__init__.py +10 -0
- altasigma-0.0.4.post2/altasigma/progress_reporter/progress_reporter.py +208 -0
- altasigma-0.0.4.post2/altasigma/spark_session/__init__.py +8 -0
- altasigma-0.0.4.post2/altasigma/spark_session/spark_session_util.py +174 -0
- altasigma-0.0.4.post2/altasigma/utils/__init__.py +8 -0
- altasigma-0.0.4.post2/altasigma/utils/notebook.py +61 -0
- altasigma-0.0.4.post2/altasigma.egg-info/PKG-INFO +51 -0
- altasigma-0.0.4.post2/altasigma.egg-info/SOURCES.txt +33 -0
- altasigma-0.0.4.post2/altasigma.egg-info/dependency_links.txt +1 -0
- altasigma-0.0.4.post2/altasigma.egg-info/requires.txt +4 -0
- altasigma-0.0.4.post2/altasigma.egg-info/top_level.txt +1 -0
- altasigma-0.0.4.post2/setup.cfg +4 -0
- altasigma-0.0.4.post2/setup.py +32 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: altasigma
|
|
3
|
+
Version: 0.0.4.post2
|
|
4
|
+
Summary: Python helpers for data science and ML workflows on the AltaSigma AI platform
|
|
5
|
+
Home-page: https://www.altasigma.com
|
|
6
|
+
Author: AltaSigma GmbH
|
|
7
|
+
Author-email: pypi@altasigma.com
|
|
8
|
+
License: Proprietary
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: requests<3,>=2
|
|
17
|
+
Requires-Dist: pandas>=1
|
|
18
|
+
Requires-Dist: ipython>=8
|
|
19
|
+
Requires-Dist: boto3<2,>=1.18.49
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: author-email
|
|
22
|
+
Dynamic: classifier
|
|
23
|
+
Dynamic: description
|
|
24
|
+
Dynamic: description-content-type
|
|
25
|
+
Dynamic: home-page
|
|
26
|
+
Dynamic: license
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: requires-python
|
|
30
|
+
Dynamic: summary
|
|
31
|
+
|
|
32
|
+
# AltaSigma Python Package
|
|
33
|
+
|
|
34
|
+
A Python library for working with the AltaSigma AI platform.
|
|
35
|
+
This package provides convenient tools for data scientists and developers to manage data,
|
|
36
|
+
credentials, and ML workflows on AltaSigma infrastructure.
|
|
37
|
+
|
|
38
|
+
## What is AltaSigma?
|
|
39
|
+
|
|
40
|
+
AltaSigma is an AI platform that provides managed infrastructure for data science and machine learning workloads.
|
|
41
|
+
Check https://www.altasigma.com/ to learn more.
|
|
42
|
+
|
|
43
|
+
## What does this package do?
|
|
44
|
+
|
|
45
|
+
This helper library simplifies common tasks when working on AltaSigma:
|
|
46
|
+
|
|
47
|
+
- **Data Access**: Read and write data from AltaSigma data sources
|
|
48
|
+
- **Credential Management**: Handle authentication tokens and credentials securely
|
|
49
|
+
- **Job Supervision**: Manage training, evaluation, and prediction workflows
|
|
50
|
+
- **Configuration**: Environment-aware settings for different job types
|
|
51
|
+
- **Progress Tracking**: Report progress for long-running operations
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# AltaSigma Python Package
|
|
2
|
+
|
|
3
|
+
A Python library for working with the AltaSigma AI platform.
|
|
4
|
+
This package provides convenient tools for data scientists and developers to manage data,
|
|
5
|
+
credentials, and ML workflows on AltaSigma infrastructure.
|
|
6
|
+
|
|
7
|
+
## What is AltaSigma?
|
|
8
|
+
|
|
9
|
+
AltaSigma is an AI platform that provides managed infrastructure for data science and machine learning workloads.
|
|
10
|
+
Check https://www.altasigma.com/ to learn more.
|
|
11
|
+
|
|
12
|
+
## What does this package do?
|
|
13
|
+
|
|
14
|
+
This helper library simplifies common tasks when working on AltaSigma:
|
|
15
|
+
|
|
16
|
+
- **Data Access**: Read and write data from AltaSigma data sources
|
|
17
|
+
- **Credential Management**: Handle authentication tokens and credentials securely
|
|
18
|
+
- **Job Supervision**: Manage training, evaluation, and prediction workflows
|
|
19
|
+
- **Configuration**: Environment-aware settings for different job types
|
|
20
|
+
- **Progress Tracking**: Report progress for long-running operations
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""AltaSigma package for data management and processing.
|
|
2
|
+
|
|
3
|
+
This package provides tools for credential management, data source operations,
|
|
4
|
+
configuration management, and job supervision.
|
|
5
|
+
|
|
6
|
+
The public API components are explicitly imported and exposed at the package level.
|
|
7
|
+
Users should import components directly from the `altasigma` package rather than
|
|
8
|
+
from individual submodules.
|
|
9
|
+
|
|
10
|
+
Examples:
|
|
11
|
+
Recommended import style:
|
|
12
|
+
|
|
13
|
+
>>> from altasigma import CredentialUtils, AltaSigma, S3DataSource
|
|
14
|
+
|
|
15
|
+
Avoid importing from submodules directly:
|
|
16
|
+
|
|
17
|
+
>>> # Not recommended
|
|
18
|
+
>>> from altasigma.credentials.credential_utils import CredentialUtils
|
|
19
|
+
|
|
20
|
+
Note:
|
|
21
|
+
Only components explicitly imported in this file are considered part of the
|
|
22
|
+
public API. Other components within submodules should be treated as internal
|
|
23
|
+
implementation details that may change without notice.
|
|
24
|
+
|
|
25
|
+
The only exception to that is altasigma.spark_session.spark_session_util, which contains
|
|
26
|
+
imports from pyspark. Not importing it here makes pyspark an optional dependency.
|
|
27
|
+
"""
|
|
28
|
+
from altasigma.credentials.credential_utils import CredentialUtils
|
|
29
|
+
from altasigma.initialize import AltaSigma, initialize
|
|
30
|
+
from altasigma.io.data_management import S3DataSource, Bucket, CassandraDataSource, BiographyInfo, BiographyInfoEntry, TextEntry, S3PathEntry, S3Data, CassandraTableEntry, CassandraData, get_datasource
|
|
31
|
+
from altasigma.config.config import RunEnv, JobType
|
|
32
|
+
from altasigma.jobsupervisor.reports import dataframe_to_table_report_data
|
|
33
|
+
# Spark is an optional dependency. If we add an import here it is no longer optional
|
|
34
|
+
# from altasigma.spark_session.spark_session_util import get_spark_session
|
|
35
|
+
from altasigma.progress_reporter.progress_reporter import ProgressReporter
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python module for configuration management in AltaSigma Modules.
|
|
3
|
+
|
|
4
|
+
This module defines job types, environment settings, and configuration classes
|
|
5
|
+
for various components of the system.
|
|
6
|
+
"""
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from enum import Enum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from ..credentials.credential_utils import _credential_utils
|
|
17
|
+
from ..credentials.token_refresher import TokenRefresher
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JobType(Enum):
|
|
23
|
+
"""Enumeration of job types for Modules/Augurs.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
Learning (str): Job type for learning jobs.
|
|
27
|
+
Evaluation (str): Job type for evaluation jobs.
|
|
28
|
+
Prediction (str): Job type for prediction jobs.
|
|
29
|
+
RealtimeScoring (str): Job type for real-time prediction jobs.
|
|
30
|
+
"""
|
|
31
|
+
Learning = "learning"
|
|
32
|
+
Evaluation = "evaluation"
|
|
33
|
+
Prediction = "prediction"
|
|
34
|
+
RealtimeScoring = "realtime-scoring"
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_name(cls, name):
|
|
38
|
+
"""Converts a string name to its corresponding JobType enum.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
name (str): The string name of the job type.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
JobType: The corresponding JobType enum.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If the provided name does not match any JobType.
|
|
48
|
+
"""
|
|
49
|
+
for enum in JobType.__members__.values():
|
|
50
|
+
if enum.value == name:
|
|
51
|
+
return enum
|
|
52
|
+
raise ValueError(f"{name} is not a valid JobType name.")
|
|
53
|
+
|
|
54
|
+
def to_name(self):
|
|
55
|
+
"""Gets the string representation of the job type.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
str: The string name of the job type.
|
|
59
|
+
"""
|
|
60
|
+
return self.value
|
|
61
|
+
|
|
62
|
+
def is_batch_job(self):
|
|
63
|
+
"""Determines if the job type is a batch job.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
bool: True if the job type is Learning, Evaluation, or Prediction, False otherwise.
|
|
67
|
+
"""
|
|
68
|
+
return self in [JobType.Learning, JobType.Evaluation, JobType.Prediction]
|
|
69
|
+
|
|
70
|
+
def is_passed_model_in_env_in_prod(self):
|
|
71
|
+
"""Determines if the job type is passed a model in the production environment.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
bool: True if the job type is RealtimeScoring, False otherwise.
|
|
75
|
+
"""
|
|
76
|
+
return self in [JobType.RealtimeScoring]
|
|
77
|
+
|
|
78
|
+
def is_passed_model_in_env_in_dev(self):
|
|
79
|
+
"""Determines if the job type is passed a model in the development environment.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
bool: True if the job type is Evaluation, Prediction, or RealtimeScoring, False otherwise.
|
|
83
|
+
"""
|
|
84
|
+
return self in [JobType.Evaluation, JobType.Prediction, JobType.RealtimeScoring]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class RunEnv(Enum):
|
|
88
|
+
"""Enumeration of running environments.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
Dev (str): Development environment.
|
|
92
|
+
Prod (str): Production environment.
|
|
93
|
+
"""
|
|
94
|
+
Dev = "dev"
|
|
95
|
+
Prod = "prod"
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def from_name(cls, name):
|
|
99
|
+
"""Converts a string name to its corresponding RunEnv enum.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
name (str): The string name of the run environment.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
RunEnv: The corresponding RunEnv enum.
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ValueError: If the provided name does not match any RunEnv.
|
|
109
|
+
"""
|
|
110
|
+
for enum in RunEnv.__members__.values():
|
|
111
|
+
if enum.value == name:
|
|
112
|
+
return enum
|
|
113
|
+
raise ValueError(f"{name} is not a valid RunEnv name.")
|
|
114
|
+
|
|
115
|
+
def to_name(self):
|
|
116
|
+
"""Gets the string representation of the run environment.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
str: The string name of the run environment.
|
|
120
|
+
"""
|
|
121
|
+
return self.value
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass()
|
|
125
|
+
class JobConfig:
|
|
126
|
+
"""Configuration for a job.
|
|
127
|
+
|
|
128
|
+
Attributes:
|
|
129
|
+
augur_code (str): Identifier for the augur.
|
|
130
|
+
job_type (JobType): Type of the job.
|
|
131
|
+
job_code (str | None): Identifier for the job, None if not a batch job.
|
|
132
|
+
model_code (str, optional): Identifier for the model. None at the start if JobType.Learning.
|
|
133
|
+
Defaults to None.
|
|
134
|
+
settings_code (str, optional): Identifier for settings. Only set for realtime_prediction.
|
|
135
|
+
Defaults to None.
|
|
136
|
+
realtime_server_host (str, optional): Host for the realtime server. Defaults to None.
|
|
137
|
+
realtime_server_port (str, optional): Port for the realtime server. Defaults to None.
|
|
138
|
+
"""
|
|
139
|
+
augur_code: str
|
|
140
|
+
job_type: JobType
|
|
141
|
+
# None if not a batch job
|
|
142
|
+
job_code: str | None
|
|
143
|
+
# None at the start if JobType.Learning, because that creates a model_code, which will be received from the JobSupervisor
|
|
144
|
+
model_code: str = None
|
|
145
|
+
# Only set for realtime_prediction since that needs a stable identifier
|
|
146
|
+
settings_code: str = None
|
|
147
|
+
realtime_server_host: str = None
|
|
148
|
+
realtime_server_port: str = None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass(frozen=True)
|
|
152
|
+
class DashboardConfig:
|
|
153
|
+
"""Configuration for the dashboard API.
|
|
154
|
+
|
|
155
|
+
Attributes:
|
|
156
|
+
api_host (str): Hostname for the dashboard API.
|
|
157
|
+
api_port (int): Port number for the dashboard API.
|
|
158
|
+
"""
|
|
159
|
+
api_host: str
|
|
160
|
+
api_port: int
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass(frozen=True)
|
|
164
|
+
class DataManConfig:
|
|
165
|
+
"""Configuration for the data management API.
|
|
166
|
+
|
|
167
|
+
Attributes:
|
|
168
|
+
api_host (str): Hostname for the data management API.
|
|
169
|
+
api_port (int): Port number for the data management API.
|
|
170
|
+
"""
|
|
171
|
+
api_host: str
|
|
172
|
+
api_port: int
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def generate_dev_job_code(job_type: JobType) -> str:
|
|
176
|
+
"""Generates a job code for development environment.
|
|
177
|
+
|
|
178
|
+
Creates a job code based on current timestamp and job type.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
job_type (JobType): Type of the job.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
str: A generated job code string.
|
|
185
|
+
"""
|
|
186
|
+
return f'{datetime.now().strftime("%Y%m%dT%H%M%SZ")}_{job_type.to_name().upper()}'
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass
|
|
190
|
+
class ModuleConfig:
|
|
191
|
+
"""Main configuration class for the module.
|
|
192
|
+
|
|
193
|
+
This class holds all configuration parameters for the module,
|
|
194
|
+
including job, dashboard, and data management configurations.
|
|
195
|
+
|
|
196
|
+
Attributes:
|
|
197
|
+
run_env (RunEnv): Running environment, either "dev" or "prod".
|
|
198
|
+
job (JobConfig): Job configuration.
|
|
199
|
+
dashboard (DashboardConfig | None): Dashboard API configuration.
|
|
200
|
+
data_man (DataManConfig | None): Data management API configuration.
|
|
201
|
+
package_log_level (int): Logging level for the package.
|
|
202
|
+
root_log_level (int): Root logging level.
|
|
203
|
+
"""
|
|
204
|
+
# "dev" | "prod"
|
|
205
|
+
run_env: RunEnv
|
|
206
|
+
|
|
207
|
+
job: JobConfig
|
|
208
|
+
dashboard: DashboardConfig | None
|
|
209
|
+
data_man: DataManConfig | None
|
|
210
|
+
|
|
211
|
+
package_log_level: int
|
|
212
|
+
root_log_level: int
|
|
213
|
+
|
|
214
|
+
def __init__(self):
|
|
215
|
+
"""Initializes a ModuleConfig instance.
|
|
216
|
+
|
|
217
|
+
Loads configuration from environment variables and sets up the necessary
|
|
218
|
+
configurations for job processing, dashboard, and data management.
|
|
219
|
+
"""
|
|
220
|
+
# General config parameters
|
|
221
|
+
realtime_server_host = os.environ.get("JOB_ARG_REALTIME_SERVER_HOST", "0.0.0.0")
|
|
222
|
+
realtime_server_port = os.environ.get("JOB_ARG_REALTIME_SERVER_PORT", 5000)
|
|
223
|
+
|
|
224
|
+
# Always given
|
|
225
|
+
self.run_env = RunEnv.from_name(os.environ["JOB_ENV"])
|
|
226
|
+
job_type = JobType.from_name(os.environ["JOB_ARG_JOB_TYPE"])
|
|
227
|
+
|
|
228
|
+
# Always either set, can be generated or not required
|
|
229
|
+
augur_code = None
|
|
230
|
+
if self.run_env == RunEnv.Prod:
|
|
231
|
+
augur_code = os.environ["JOB_ARG_AUGUR_CODE"]
|
|
232
|
+
job_code = None
|
|
233
|
+
if job_type.is_batch_job():
|
|
234
|
+
if self.run_env == RunEnv.Prod:
|
|
235
|
+
job_code = os.environ["JOB_ARG_JOB_CODE"]
|
|
236
|
+
elif self.run_env == RunEnv.Dev:
|
|
237
|
+
job_code = generate_dev_job_code(job_type)
|
|
238
|
+
|
|
239
|
+
model_code = None
|
|
240
|
+
if (self.run_env == RunEnv.Prod and job_type.is_passed_model_in_env_in_prod()) or (
|
|
241
|
+
self.run_env == RunEnv.Dev and job_type.is_passed_model_in_env_in_dev()):
|
|
242
|
+
model_code = os.environ["JOB_ARG_MODEL_CODE"]
|
|
243
|
+
|
|
244
|
+
settings_code = None
|
|
245
|
+
if self.run_env == RunEnv.Prod and job_type == JobType.RealtimeScoring:
|
|
246
|
+
settings_code = os.environ["JOB_ARG_SETTINGS_CODE"]
|
|
247
|
+
|
|
248
|
+
self.job = JobConfig(
|
|
249
|
+
augur_code=augur_code,
|
|
250
|
+
job_type=job_type,
|
|
251
|
+
job_code=job_code,
|
|
252
|
+
model_code=model_code,
|
|
253
|
+
settings_code=settings_code,
|
|
254
|
+
realtime_server_host=realtime_server_host,
|
|
255
|
+
realtime_server_port=realtime_server_port
|
|
256
|
+
)
|
|
257
|
+
if self.run_env == RunEnv.Prod:
|
|
258
|
+
self.dashboard = DashboardConfig(os.environ["DASHBOARD_API_HOST"], os.environ["DASHBOARD_API_PORT"])
|
|
259
|
+
self.data_man = _data_man_config()
|
|
260
|
+
|
|
261
|
+
# In Code Capsule run jobs this is actually added by the orchestration, but it's also just hardcoded there.
|
|
262
|
+
# Seems pointless to add it to the augur jobs too, just to satisfy the way the CredentialUtils is written
|
|
263
|
+
os.environ["CLIENT_ID"] = "altasigma-frontend"
|
|
264
|
+
if self.run_env == RunEnv.Dev:
|
|
265
|
+
# In the workbench we need to override some things to make it fit with the CredentialsUtils, which otherwise would assume the Code Capsule + Device Auth Flow
|
|
266
|
+
# Write the refresh token into the tmp file, because we neither do the device flow, nor have the mounted secret with a refresh token
|
|
267
|
+
refresh_token = os.environ.get("AS_TOKEN")
|
|
268
|
+
_credential_utils()._write_tokens_to_file("", refresh_token)
|
|
269
|
+
|
|
270
|
+
# Fix client secret for non-device auth flow
|
|
271
|
+
try:
|
|
272
|
+
del os.environ['CLIENT_SECRET']
|
|
273
|
+
except Exception as e:
|
|
274
|
+
logger.warning(f"Unexpected error deleting environment variable CLIENT_SECRET: {e}")
|
|
275
|
+
|
|
276
|
+
TokenRefresher().schedule_refresh()
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
module_config = None
|
|
280
|
+
data_man_config = None
|
|
281
|
+
|
|
282
|
+
def _module_config():
|
|
283
|
+
"""Gets or creates the module configuration.
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
ModuleConfig: The module configuration instance.
|
|
287
|
+
"""
|
|
288
|
+
global module_config
|
|
289
|
+
if module_config is not None:
|
|
290
|
+
return module_config
|
|
291
|
+
else:
|
|
292
|
+
module_config = ModuleConfig()
|
|
293
|
+
return module_config
|
|
294
|
+
|
|
295
|
+
def _data_man_config():
|
|
296
|
+
"""Gets or creates the data management configuration.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
DataManConfig: The data management configuration instance.
|
|
300
|
+
"""
|
|
301
|
+
global data_man_config
|
|
302
|
+
if data_man_config is not None:
|
|
303
|
+
return data_man_config
|
|
304
|
+
else:
|
|
305
|
+
data_man_config = DataManConfig(os.environ["DATA_MAN_API_HOST"], os.environ["DATA_MAN_API_PORT"])
|
|
306
|
+
return data_man_config
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP Session Configuration Module
|
|
3
|
+
|
|
4
|
+
This module provides a centralized HTTP session with configurable SSL verification
|
|
5
|
+
for all HTTP requests throughout the AltaSigma package.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
# Global session instance
|
|
12
|
+
http_session = None
|
|
13
|
+
|
|
14
|
+
def _http_session():
|
|
15
|
+
"""Gets or creates the configured HTTP session.
|
|
16
|
+
|
|
17
|
+
Creates a requests session with SSL verification settings based on
|
|
18
|
+
the DISABLE_SSL_VERIFICATION environment variable. When DISABLE_SSL_VERIFICATION=true,
|
|
19
|
+
SSL certificate verification is disabled.
|
|
20
|
+
|
|
21
|
+
Note: The requests library automatically uses REQUESTS_CA_BUNDLE environment
|
|
22
|
+
variable for custom CA certificates when verification is enabled.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
requests.Session: A configured requests session.
|
|
26
|
+
"""
|
|
27
|
+
global http_session
|
|
28
|
+
if http_session is not None:
|
|
29
|
+
return http_session
|
|
30
|
+
else:
|
|
31
|
+
http_session = requests.Session()
|
|
32
|
+
|
|
33
|
+
# Get SSL configuration
|
|
34
|
+
ssl_config = _get_ssl_config()
|
|
35
|
+
|
|
36
|
+
if not ssl_config['verify']:
|
|
37
|
+
# Disable SSL verification
|
|
38
|
+
http_session.verify = False
|
|
39
|
+
# Disable SSL warnings when verification is disabled
|
|
40
|
+
import urllib3
|
|
41
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
42
|
+
# else: requests automatically uses REQUESTS_CA_BUNDLE if set
|
|
43
|
+
|
|
44
|
+
return http_session
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _get_ssl_config():
|
|
48
|
+
"""Get SSL configuration settings for use across different HTTP clients.
|
|
49
|
+
|
|
50
|
+
Returns a dictionary with SSL configuration that can be used by
|
|
51
|
+
boto3, requests, and other HTTP clients.
|
|
52
|
+
|
|
53
|
+
Consumers should check the 'verify' boolean first. If verification is not
|
|
54
|
+
disabled and 'ca_bundle' is provided, use the ca_bundle path.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
dict: SSL configuration with keys:
|
|
58
|
+
- 'verify': Boolean indicating whether to verify SSL certificates
|
|
59
|
+
- 'ca_bundle': Path to CA certificate bundle if set via REQUESTS_CA_BUNDLE, or None
|
|
60
|
+
"""
|
|
61
|
+
disable_ssl_verification = os.environ.get('DISABLE_SSL_VERIFICATION', 'false').lower() == 'true'
|
|
62
|
+
ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE')
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
'verify': not disable_ssl_verification,
|
|
66
|
+
'ca_bundle': ca_bundle
|
|
67
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Credential utilities for managing authentication and secure data source access. For example in the Workbench and in Code Capsules.
|
|
3
|
+
|
|
4
|
+
This module provides tools for obtaining, refreshing, and managing authentication
|
|
5
|
+
credentials required to access various data sources within the AltaSigma ecosystem.
|
|
6
|
+
It supports both interactive authentication flows for notebooks and automated
|
|
7
|
+
authentication for production environments.
|
|
8
|
+
"""
|