ml-analytics-tools 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ml_analytics_tools-0.2.0/LICENSE +21 -0
  2. ml_analytics_tools-0.2.0/PKG-INFO +231 -0
  3. ml_analytics_tools-0.2.0/README.md +197 -0
  4. ml_analytics_tools-0.2.0/ml_analytics/__init__.py +53 -0
  5. ml_analytics_tools-0.2.0/ml_analytics/aws_auth.py +169 -0
  6. ml_analytics_tools-0.2.0/ml_analytics/cli.py +58 -0
  7. ml_analytics_tools-0.2.0/ml_analytics/data_connector.py +2615 -0
  8. ml_analytics_tools-0.2.0/ml_analytics/gsheet_connector.py +1646 -0
  9. ml_analytics_tools-0.2.0/ml_analytics/model_manager.py +1208 -0
  10. ml_analytics_tools-0.2.0/ml_analytics/model_tools.py +990 -0
  11. ml_analytics_tools-0.2.0/ml_analytics/s3_connector.py +1381 -0
  12. ml_analytics_tools-0.2.0/ml_analytics/slack_connector.py +637 -0
  13. ml_analytics_tools-0.2.0/ml_analytics/tunnel_manager.py +277 -0
  14. ml_analytics_tools-0.2.0/ml_analytics/utils.py +673 -0
  15. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/PKG-INFO +231 -0
  16. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/SOURCES.txt +29 -0
  17. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/dependency_links.txt +1 -0
  18. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/entry_points.txt +4 -0
  19. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/requires.txt +25 -0
  20. ml_analytics_tools-0.2.0/ml_analytics_tools.egg-info/top_level.txt +1 -0
  21. ml_analytics_tools-0.2.0/pyproject.toml +147 -0
  22. ml_analytics_tools-0.2.0/setup.cfg +4 -0
  23. ml_analytics_tools-0.2.0/tests/test_aws_auth.py +133 -0
  24. ml_analytics_tools-0.2.0/tests/test_db_s3.py +554 -0
  25. ml_analytics_tools-0.2.0/tests/test_gsheet_connector.py +982 -0
  26. ml_analytics_tools-0.2.0/tests/test_identity_column.py +246 -0
  27. ml_analytics_tools-0.2.0/tests/test_model_manager.py +64 -0
  28. ml_analytics_tools-0.2.0/tests/test_model_tools.py +304 -0
  29. ml_analytics_tools-0.2.0/tests/test_s3_redshift_validation.py +297 -0
  30. ml_analytics_tools-0.2.0/tests/test_tunnel_manager.py +289 -0
  31. ml_analytics_tools-0.2.0/tests/test_utils.py +414 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sebastian Daza
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: ml-analytics-tools
3
+ Version: 0.2.0
4
+ Summary: Tools for ML projects and data management
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: boto3>=1.37.24
9
+ Requires-Dist: catboost>=1.2.8
10
+ Requires-Dist: ddtrace>=3.4.1
11
+ Requires-Dist: dotenv>=0.9.9
12
+ Requires-Dist: duckdb>=1.4.1
13
+ Requires-Dist: google-api-python-client>=2.150.0
14
+ Requires-Dist: google-auth>=2.35.0
15
+ Requires-Dist: google-auth-httplib2>=0.2.0
16
+ Requires-Dist: google-auth-oauthlib>=1.2.0
17
+ Requires-Dist: ipykernel>=6.29.5
18
+ Requires-Dist: lifelines>=0.30.3
19
+ Requires-Dist: mlflow==3.10.1
20
+ Requires-Dist: mlflow[auth]==3.10.1
21
+ Requires-Dist: pip>=25.3
22
+ Requires-Dist: polars==1.30.0
23
+ Requires-Dist: pytest>=8.3.5
24
+ Requires-Dist: pyyaml>=6.0.2
25
+ Requires-Dist: redshift-connector>=2.1.9
26
+ Requires-Dist: ruff>=0.11.4
27
+ Requires-Dist: schedule>=1.2.2
28
+ Requires-Dist: scikit-learn==1.5.2
29
+ Requires-Dist: seaborn>=0.13.2
30
+ Requires-Dist: setuptools>=42.0.0
31
+ Requires-Dist: shap>=0.47.2
32
+ Requires-Dist: slack-sdk>=3.27.0
33
+ Dynamic: license-file
34
+
35
+ # ML Analytics Tools
36
+
37
+ Utilities for common analytics and machine learning workflows: Redshift, S3,
38
+ Google Sheets, Slack, MLflow, model evaluation, and SQL pipelines.
39
+
40
+ The package is intentionally infrastructure-neutral. Buckets, credentials,
41
+ MLflow hosts, and tokens are provided by your environment or by explicit
42
+ arguments.
43
+
44
+ ## What Is Included
45
+
46
+ - `DataConnector`: run Redshift SQL, load SQL files, unload/load data through S3, and create Redshift tables from DataFrames.
47
+ - `S3Connector`: read, write, list, delete, and query S3 data with DuckDB.
48
+ - `GSheet`: read, write, share, and export Google Sheets data.
49
+ - `SlackConnector`: send messages, upload files, and manage simple Slack interactions.
50
+ - `ModelManager`: create MLflow experiments, log models, register versions, manage aliases, and handle permissions.
51
+ - `model_tools`: classification, regression, survival analysis, CatBoost helpers, plotting, and reporting utilities.
52
+ - `utils`: project-root discovery, SQL file loading, logging, credentials, and YAML SQL pipelines.
53
+
54
+ ## Install
55
+
56
+ From PyPI, after a release is available:
57
+
58
+ ```bash
59
+ uv add ml-analytics-tools
60
+ ```
61
+
62
+ Directly from GitHub:
63
+
64
+ ```bash
65
+ uv add git+https://github.com/sdaza/ml-analytics-tools
66
+ ```
67
+
68
+ For local development:
69
+
70
+ ```bash
71
+ uv sync --all-groups
72
+ ```
73
+
74
+ ## Configuration
75
+
76
+ The package loads a `.env` file from the project root when it is imported.
77
+ Only configure the services you use.
78
+
79
+ ```bash
80
+ # Redshift
81
+ BI_REDSHIFT_HOST=redshift-cluster.example.com
82
+ BI_REDSHIFT_DB=analytics
83
+ BI_REDSHIFT_USER=analytics_user
84
+ BI_REDSHIFT_PASSWORD=secret
85
+ BI_REDSHIFT_PORT=5439
86
+
87
+ # S3
88
+ ML_ANALYTICS_S3_BUCKET=my-analytics-bucket
89
+
90
+ # MLflow
91
+ MLFLOW_TRACKING_URI=https://mlflow.example.com
92
+ MLFLOW_TRACKING_USERNAME=user@example.com
93
+ MLFLOW_TRACKING_PASSWORD=secret
94
+
95
+ # Google Sheets
96
+ GSHEET_SPREADSHEET_ID=optional-default-sheet-id
97
+ GOOGLE_CREDENTIALS='{"type":"service_account", ...}'
98
+
99
+ # Slack
100
+ SLACK_BOT_TOKEN=xoxb-your-token
101
+ ```
102
+
103
+ S3 buckets are never hard-coded. Pass `bucket=...` or `s3_bucket=...`, or set
104
+ `ML_ANALYTICS_S3_BUCKET`.
105
+
106
+ ## AWS Authentication
107
+
108
+ Use the CLI helper for AWS SSO:
109
+
110
+ ```bash
111
+ ml-analytics-auth
112
+ ```
113
+
114
+ You can also call it from Python:
115
+
116
+ ```python
117
+ from ml_analytics import ensure_aws_authenticated
118
+
119
+ ensure_aws_authenticated()
120
+ ```
121
+
122
+ See [AWS Authentication](docs/AWS_AUTHENTICATION.md) and
123
+ [CLI Commands](docs/CLI_COMMANDS.md) for details.
124
+
125
+ ## Quick Examples
126
+
127
+ ### Query Redshift
128
+
129
+ ```python
130
+ from ml_analytics import DataConnector
131
+
132
+ dc = DataConnector()
133
+
134
+ df = dc.sql("SELECT * FROM analytics.customer_features LIMIT 100")
135
+ df_polars = dc.sql("queries/features.sql", format="polars", country="es")
136
+ ```
137
+
138
+ ### Create A Redshift Table From A DataFrame
139
+
140
+ ```python
141
+ dc.create_table_from_dataframe(
142
+ df,
143
+ table="model_scores",
144
+ schema="analytics",
145
+ drop_existing_table=True,
146
+ )
147
+ ```
148
+
149
+ ### Work With S3
150
+
151
+ ```python
152
+ from ml_analytics import S3Connector
153
+
154
+ s3 = S3Connector(bucket="my-analytics-bucket", s3_root="projects/churn")
155
+
156
+ s3.save_dataframe(df, directory="outputs", file_name="scores")
157
+
158
+ summary = s3.query(
159
+ """
160
+ SELECT segment, count(*) AS rows
161
+ FROM read_parquet('s3://my-analytics-bucket/projects/churn/outputs/*.parquet')
162
+ GROUP BY segment
163
+ """
164
+ )
165
+ ```
166
+
167
+ ### Read And Write Google Sheets
168
+
169
+ ```python
170
+ from ml_analytics import GSheet
171
+
172
+ gsheet = GSheet(credentials_path="gsheet_credentials.json")
173
+
174
+ df = gsheet.read_sheet(spreadsheet_id="...", sheet_name="Input")
175
+ gsheet.write_sheet(df, spreadsheet_id="...", sheet_name="Results")
176
+ ```
177
+
178
+ ### Log To MLflow
179
+
180
+ ```python
181
+ from ml_analytics import ModelManager
182
+
183
+ manager = ModelManager(model_name="churn-model", user="user@example.com")
184
+
185
+ manager.start_run("training")
186
+ manager.log_metric("auc", 0.91)
187
+ manager.end_run()
188
+ ```
189
+
190
+ ### Send A Slack Message
191
+
192
+ ```python
193
+ from ml_analytics import SlackConnector
194
+
195
+ slack = SlackConnector()
196
+ slack.send_message(channel="#ml-alerts", text="Training finished")
197
+ ```
198
+
199
+ ## Detailed Guides
200
+
201
+ | Guide | Use It For |
202
+ | --- | --- |
203
+ | [AWS Authentication](docs/AWS_AUTHENTICATION.md) | AWS SSO setup and Python helpers |
204
+ | [CLI Commands](docs/CLI_COMMANDS.md) | Available console commands |
205
+ | [Google Sheets](docs/GSHEET_CONNECTOR_USAGE.md) | Sheets setup, sharing, exports, and examples |
206
+ | [Slack](docs/SLACK_CONNECTOR_USAGE.md) | Slack token setup and message/file examples |
207
+ | [Tunnel Manager](docs/TUNNEL_MANAGER.md) | SSH tunnel configuration and CLI usage |
208
+
209
+ ## Development
210
+
211
+ Run the standard checks before opening a PR:
212
+
213
+ ```bash
214
+ uv run ruff check
215
+ uv run pytest
216
+ ```
217
+
218
+ CI runs Ruff and pytest on Python 3.11 and 3.12.
219
+
220
+ ## Releases
221
+
222
+ This repository uses Release Please. Conventional commits on `main` create or
223
+ update a release PR with the next version and changelog. When that PR is merged,
224
+ the release workflow builds the package and publishes it to PyPI through Trusted
225
+ Publishing using the `pypi` GitHub environment.
226
+
227
+ ## Contributing
228
+
229
+ Keep changes small, covered by tests when behavior changes, and free of
230
+ environment-specific defaults. Prefer explicit configuration over hidden
231
+ infrastructure assumptions.
@@ -0,0 +1,197 @@
1
+ # ML Analytics Tools
2
+
3
+ Utilities for common analytics and machine learning workflows: Redshift, S3,
4
+ Google Sheets, Slack, MLflow, model evaluation, and SQL pipelines.
5
+
6
+ The package is intentionally infrastructure-neutral. Buckets, credentials,
7
+ MLflow hosts, and tokens are provided by your environment or by explicit
8
+ arguments.
9
+
10
+ ## What Is Included
11
+
12
+ - `DataConnector`: run Redshift SQL, load SQL files, unload/load data through S3, and create Redshift tables from DataFrames.
13
+ - `S3Connector`: read, write, list, delete, and query S3 data with DuckDB.
14
+ - `GSheet`: read, write, share, and export Google Sheets data.
15
+ - `SlackConnector`: send messages, upload files, and manage simple Slack interactions.
16
+ - `ModelManager`: create MLflow experiments, log models, register versions, manage aliases, and handle permissions.
17
+ - `model_tools`: classification, regression, survival analysis, CatBoost helpers, plotting, and reporting utilities.
18
+ - `utils`: project-root discovery, SQL file loading, logging, credentials, and YAML SQL pipelines.
19
+
20
+ ## Install
21
+
22
+ From PyPI, after a release is available:
23
+
24
+ ```bash
25
+ uv add ml-analytics-tools
26
+ ```
27
+
28
+ Directly from GitHub:
29
+
30
+ ```bash
31
+ uv add git+https://github.com/sdaza/ml-analytics-tools
32
+ ```
33
+
34
+ For local development:
35
+
36
+ ```bash
37
+ uv sync --all-groups
38
+ ```
39
+
40
+ ## Configuration
41
+
42
+ The package loads a `.env` file from the project root when it is imported.
43
+ Only configure the services you use.
44
+
45
+ ```bash
46
+ # Redshift
47
+ BI_REDSHIFT_HOST=redshift-cluster.example.com
48
+ BI_REDSHIFT_DB=analytics
49
+ BI_REDSHIFT_USER=analytics_user
50
+ BI_REDSHIFT_PASSWORD=secret
51
+ BI_REDSHIFT_PORT=5439
52
+
53
+ # S3
54
+ ML_ANALYTICS_S3_BUCKET=my-analytics-bucket
55
+
56
+ # MLflow
57
+ MLFLOW_TRACKING_URI=https://mlflow.example.com
58
+ MLFLOW_TRACKING_USERNAME=user@example.com
59
+ MLFLOW_TRACKING_PASSWORD=secret
60
+
61
+ # Google Sheets
62
+ GSHEET_SPREADSHEET_ID=optional-default-sheet-id
63
+ GOOGLE_CREDENTIALS='{"type":"service_account", ...}'
64
+
65
+ # Slack
66
+ SLACK_BOT_TOKEN=xoxb-your-token
67
+ ```
68
+
69
+ S3 buckets are never hard-coded. Pass `bucket=...` or `s3_bucket=...`, or set
70
+ `ML_ANALYTICS_S3_BUCKET`.
71
+
72
+ ## AWS Authentication
73
+
74
+ Use the CLI helper for AWS SSO:
75
+
76
+ ```bash
77
+ ml-analytics-auth
78
+ ```
79
+
80
+ You can also call it from Python:
81
+
82
+ ```python
83
+ from ml_analytics import ensure_aws_authenticated
84
+
85
+ ensure_aws_authenticated()
86
+ ```
87
+
88
+ See [AWS Authentication](docs/AWS_AUTHENTICATION.md) and
89
+ [CLI Commands](docs/CLI_COMMANDS.md) for details.
90
+
91
+ ## Quick Examples
92
+
93
+ ### Query Redshift
94
+
95
+ ```python
96
+ from ml_analytics import DataConnector
97
+
98
+ dc = DataConnector()
99
+
100
+ df = dc.sql("SELECT * FROM analytics.customer_features LIMIT 100")
101
+ df_polars = dc.sql("queries/features.sql", format="polars", country="es")
102
+ ```
103
+
104
+ ### Create A Redshift Table From A DataFrame
105
+
106
+ ```python
107
+ dc.create_table_from_dataframe(
108
+ df,
109
+ table="model_scores",
110
+ schema="analytics",
111
+ drop_existing_table=True,
112
+ )
113
+ ```
114
+
115
+ ### Work With S3
116
+
117
+ ```python
118
+ from ml_analytics import S3Connector
119
+
120
+ s3 = S3Connector(bucket="my-analytics-bucket", s3_root="projects/churn")
121
+
122
+ s3.save_dataframe(df, directory="outputs", file_name="scores")
123
+
124
+ summary = s3.query(
125
+ """
126
+ SELECT segment, count(*) AS rows
127
+ FROM read_parquet('s3://my-analytics-bucket/projects/churn/outputs/*.parquet')
128
+ GROUP BY segment
129
+ """
130
+ )
131
+ ```
132
+
133
+ ### Read And Write Google Sheets
134
+
135
+ ```python
136
+ from ml_analytics import GSheet
137
+
138
+ gsheet = GSheet(credentials_path="gsheet_credentials.json")
139
+
140
+ df = gsheet.read_sheet(spreadsheet_id="...", sheet_name="Input")
141
+ gsheet.write_sheet(df, spreadsheet_id="...", sheet_name="Results")
142
+ ```
143
+
144
+ ### Log To MLflow
145
+
146
+ ```python
147
+ from ml_analytics import ModelManager
148
+
149
+ manager = ModelManager(model_name="churn-model", user="user@example.com")
150
+
151
+ manager.start_run("training")
152
+ manager.log_metric("auc", 0.91)
153
+ manager.end_run()
154
+ ```
155
+
156
+ ### Send A Slack Message
157
+
158
+ ```python
159
+ from ml_analytics import SlackConnector
160
+
161
+ slack = SlackConnector()
162
+ slack.send_message(channel="#ml-alerts", text="Training finished")
163
+ ```
164
+
165
+ ## Detailed Guides
166
+
167
+ | Guide | Use It For |
168
+ | --- | --- |
169
+ | [AWS Authentication](docs/AWS_AUTHENTICATION.md) | AWS SSO setup and Python helpers |
170
+ | [CLI Commands](docs/CLI_COMMANDS.md) | Available console commands |
171
+ | [Google Sheets](docs/GSHEET_CONNECTOR_USAGE.md) | Sheets setup, sharing, exports, and examples |
172
+ | [Slack](docs/SLACK_CONNECTOR_USAGE.md) | Slack token setup and message/file examples |
173
+ | [Tunnel Manager](docs/TUNNEL_MANAGER.md) | SSH tunnel configuration and CLI usage |
174
+
175
+ ## Development
176
+
177
+ Run the standard checks before opening a PR:
178
+
179
+ ```bash
180
+ uv run ruff check
181
+ uv run pytest
182
+ ```
183
+
184
+ CI runs Ruff and pytest on Python 3.11 and 3.12.
185
+
186
+ ## Releases
187
+
188
+ This repository uses Release Please. Conventional commits on `main` create or
189
+ update a release PR with the next version and changelog. When that PR is merged,
190
+ the release workflow builds the package and publishes it to PyPI through Trusted
191
+ Publishing using the `pypi` GitHub environment.
192
+
193
+ ## Contributing
194
+
195
+ Keep changes small, covered by tests when behavior changes, and free of
196
+ environment-specific defaults. Prefer explicit configuration over hidden
197
+ infrastructure assumptions.
@@ -0,0 +1,53 @@
1
+ """
2
+ ML Analytics Tools Package
3
+ """
4
+
5
+ from dotenv import load_dotenv
6
+
7
+ from .aws_auth import ensure_aws_authenticated, ensure_aws_sso_login
8
+ from .data_connector import DataConnector
9
+ from .gsheet_connector import GSheet
10
+ from .model_manager import ModelManager
11
+ from .s3_connector import S3Connector
12
+ from .slack_connector import SlackConnector
13
+ from .utils import (
14
+ execute_sql_scripts,
15
+ find_project_root,
16
+ get_credential_value,
17
+ get_logger,
18
+ get_sql_files,
19
+ load_sql_query,
20
+ log_and_raise_error,
21
+ )
22
+
23
+ # Automatically load .env file when the package is imported
24
+ logger = get_logger("ml_analytics")
25
+ try:
26
+ project_root = find_project_root()
27
+ env_file = project_root / ".env"
28
+ if env_file.exists():
29
+ if load_dotenv(env_file, override=True):
30
+ logger.info(".env file loaded successfully.")
31
+ else:
32
+ logger.info("Failed to load .env file.")
33
+ else:
34
+ logger.info("No .env file present in project root.")
35
+ except Exception:
36
+ logger.info("No .env file loaded.")
37
+
38
+ __all__ = [
39
+ "DataConnector",
40
+ "ensure_aws_authenticated",
41
+ "ensure_aws_sso_login",
42
+ "execute_sql_scripts",
43
+ "find_project_root",
44
+ "get_credential_value",
45
+ "get_logger",
46
+ "get_sql_files",
47
+ "GSheet",
48
+ "load_sql_query",
49
+ "log_and_raise_error",
50
+ "ModelManager",
51
+ "S3Connector",
52
+ "SlackConnector",
53
+ ]
@@ -0,0 +1,169 @@
1
+ """
2
+ AWS authentication utilities.
3
+ """
4
+
5
+ import subprocess
6
+ import sys
7
+
8
+ from .utils import get_logger
9
+
10
+ logger = get_logger("aws_auth")
11
+
12
+
13
+ def _do_sso_login(profile: str = None) -> bool:
14
+ """
15
+ Performs the interactive SSO login flow.
16
+
17
+ Parameters
18
+ ----------
19
+ profile : str, optional
20
+ AWS profile name to use.
21
+
22
+ Returns
23
+ -------
24
+ bool
25
+ True if login successful, False otherwise.
26
+ """
27
+ try:
28
+ logger.info("AWS SSO login required - starting authentication...")
29
+ login_cmd = ["aws", "sso", "login"]
30
+ if profile:
31
+ login_cmd.extend(["--profile", profile])
32
+
33
+ # Redirect stdout to stderr so user sees prompts but eval doesn't execute them
34
+ login_result = subprocess.run(login_cmd, stdout=sys.stderr, timeout=300)
35
+
36
+ if login_result.returncode == 0:
37
+ logger.info("✓ AWS SSO login successful")
38
+ return True
39
+ else:
40
+ logger.error("✗ AWS SSO login failed")
41
+ return False
42
+
43
+ except subprocess.TimeoutExpired:
44
+ logger.error("AWS SSO login timed out")
45
+ return False
46
+ except FileNotFoundError:
47
+ logger.error("AWS CLI not found. Please install AWS CLI.")
48
+ return False
49
+ except Exception as e:
50
+ logger.error(f"Error during AWS SSO login: {e}")
51
+ return False
52
+
53
+
54
+ def ensure_aws_sso_login(profile: str = None, force: bool = False) -> bool:
55
+ """
56
+ Ensures AWS SSO is authenticated. If not, prompts user to login.
57
+
58
+ Parameters
59
+ ----------
60
+ profile : str, optional
61
+ AWS profile name to use. If None, uses default profile.
62
+ force : bool, optional
63
+ If True, skip the cached credential check and force a fresh SSO login.
64
+
65
+ Returns
66
+ -------
67
+ bool
68
+ True if authenticated successfully, False otherwise.
69
+ """
70
+ if force:
71
+ return _do_sso_login(profile)
72
+
73
+ try:
74
+ # Check if already logged in by attempting to get caller identity
75
+ cmd = ["aws", "sts", "get-caller-identity"]
76
+ if profile:
77
+ cmd.extend(["--profile", profile])
78
+
79
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
80
+
81
+ if result.returncode == 0:
82
+ # Already authenticated - don't log to reduce noise
83
+ return True
84
+
85
+ # Not logged in, attempt SSO login
86
+ return _do_sso_login(profile)
87
+
88
+ except subprocess.TimeoutExpired:
89
+ logger.error("AWS SSO login timed out")
90
+ return False
91
+ except FileNotFoundError:
92
+ logger.error("AWS CLI not found. Please install AWS CLI.")
93
+ return False
94
+ except Exception as e:
95
+ logger.error(f"Error during AWS SSO login: {e}")
96
+ return False
97
+
98
+
99
+ def ensure_aws_authenticated(sso_profile: str = None, print_exports: bool = False) -> bool:
100
+ """
101
+ Convenience function that ensures AWS SSO is authenticated.
102
+
103
+ Parameters
104
+ ----------
105
+ sso_profile : str, optional
106
+ AWS SSO profile to use
107
+ print_exports : bool, optional
108
+ Kept for backward-compatible CLI calls. No shell exports are required.
109
+
110
+ Returns
111
+ -------
112
+ bool
113
+ True if AWS SSO authentication succeeded, False otherwise.
114
+
115
+ Example
116
+ -------
117
+ >>> from ml_analytics.aws_auth import ensure_aws_authenticated
118
+ >>> ensure_aws_authenticated()
119
+ """
120
+ del print_exports
121
+ logger.info("Ensuring AWS authentication...")
122
+
123
+ if not ensure_aws_sso_login(sso_profile):
124
+ return False
125
+
126
+ logger.info("✓ AWS authentication complete")
127
+ return True
128
+
129
+
130
+ def run_uv_command(command: str) -> bool:
131
+ """
132
+ Runs a UV command and returns whether it succeeded.
133
+
134
+ Parameters
135
+ ----------
136
+ command : str
137
+ The UV command to run (e.g., "uv sync", "uv add package")
138
+
139
+ Returns
140
+ -------
141
+ bool
142
+ True if the command executed successfully, False otherwise.
143
+
144
+ Example
145
+ -------
146
+ >>> from ml_analytics.aws_auth import run_uv_command
147
+ >>> run_uv_command("uv sync")
148
+ """
149
+ try:
150
+ logger.info(f"Running UV command: {command}")
151
+ result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=300)
152
+
153
+ if result.returncode == 0:
154
+ logger.info("✓ UV command completed successfully")
155
+ if result.stdout:
156
+ print(result.stdout)
157
+ return True
158
+ else:
159
+ logger.error(f"✗ UV command failed: {result.stderr}")
160
+ if result.stderr:
161
+ print(result.stderr)
162
+ return False
163
+
164
+ except subprocess.TimeoutExpired:
165
+ logger.error(f"UV command timed out: {command}")
166
+ return False
167
+ except Exception as e:
168
+ logger.error(f"Error running UV command '{command}': {e}")
169
+ return False