rgwfuncs 0.0.113__py3-none-any.whl → 0.0.116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/__init__.py +1 -1
- rgwfuncs/df_lib.py +126 -0
- {rgwfuncs-0.0.113.dist-info → rgwfuncs-0.0.116.dist-info}/METADATA +2 -3
- rgwfuncs-0.0.116.dist-info/RECORD +10 -0
- rgwfuncs-0.0.113.dist-info/RECORD +0 -11
- rgwfuncs-0.0.113.dist-info/entry_points.txt +0 -2
- {rgwfuncs-0.0.113.dist-info → rgwfuncs-0.0.116.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.113.dist-info → rgwfuncs-0.0.116.dist-info}/licenses/LICENSE +0 -0
- {rgwfuncs-0.0.113.dist-info → rgwfuncs-0.0.116.dist-info}/top_level.txt +0 -0
rgwfuncs/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# This file is automatically generated
|
2
2
|
# Dynamically importing functions from modules
|
3
3
|
|
4
|
-
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_aws_athena_query, load_data_from_big_query, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
4
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_aws_athena_query, load_data_from_big_query, load_data_from_path, load_data_from_query, load_data_from_snowflake, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
5
5
|
from .interactive_shell_lib import interactive_shell
|
6
6
|
from .docs_lib import docs
|
7
7
|
from .str_lib import heading, send_telegram_message, sub_heading, title
|
rgwfuncs/df_lib.py
CHANGED
@@ -20,6 +20,9 @@ from email.mime.text import MIMEText
|
|
20
20
|
from email.mime.base import MIMEBase
|
21
21
|
from email import encoders
|
22
22
|
from googleapiclient.discovery import build
|
23
|
+
import snowflake.connector
|
24
|
+
from cryptography.hazmat.primitives import serialization
|
25
|
+
from cryptography.hazmat.backends import default_backend
|
23
26
|
import base64
|
24
27
|
import boto3
|
25
28
|
from typing import Optional, Dict, List, Tuple, Any, Callable, Union
|
@@ -563,6 +566,129 @@ def load_data_from_big_query(
|
|
563
566
|
return pd.DataFrame(rows, columns=columns)
|
564
567
|
|
565
568
|
|
569
|
+
def load_data_from_snowflake(
|
570
|
+
query: str,
|
571
|
+
private_key_path: Optional[str] = None,
|
572
|
+
private_key_password: Optional[str] = None,
|
573
|
+
account: Optional[str] = None,
|
574
|
+
user: Optional[str] = None,
|
575
|
+
warehouse: Optional[str] = None,
|
576
|
+
database: Optional[str] = None,
|
577
|
+
schema: Optional[str] = None,
|
578
|
+
preset: Optional[str] = None
|
579
|
+
) -> pd.DataFrame:
|
580
|
+
"""
|
581
|
+
Load data from Snowflake with a query, returning a DataFrame.
|
582
|
+
|
583
|
+
Parameters:
|
584
|
+
query (str): The SQL query to execute.
|
585
|
+
private_key_path (Optional[str]): Path to the private key file (PEM).
|
586
|
+
private_key_password (Optional[str]): Password for the encrypted private key (if any).
|
587
|
+
account (Optional[str]): Snowflake account identifier.
|
588
|
+
user (Optional[str]): Snowflake username.
|
589
|
+
warehouse (Optional[str]): Snowflake warehouse name.
|
590
|
+
database (Optional[str]): Snowflake database name.
|
591
|
+
schema (Optional[str]): Snowflake schema name.
|
592
|
+
preset (Optional[str]): Name of the Snowflake preset in the .rgwfuncsrc file.
|
593
|
+
|
594
|
+
Returns:
|
595
|
+
pd.DataFrame: DataFrame with the query results.
|
596
|
+
|
597
|
+
Raises:
|
598
|
+
ValueError: If both preset and direct creds are mixed, neither provided,
|
599
|
+
or required creds missing.
|
600
|
+
FileNotFoundError: If no '.rgwfuncsrc' found for preset.
|
601
|
+
RuntimeError: If preset not found or missing details.
|
602
|
+
"""
|
603
|
+
def get_config() -> dict:
|
604
|
+
"""Hunt for '.rgwfuncsrc' upwards like a sane person."""
|
605
|
+
current_dir = os.getcwd()
|
606
|
+
while True:
|
607
|
+
config_path = os.path.join(current_dir, '.rgwfuncsrc') # Wait, your example had .rgwfuncsrc, assuming that's not a typo.
|
608
|
+
if os.path.isfile(config_path):
|
609
|
+
with open(config_path, 'r', encoding='utf-8') as f:
|
610
|
+
content = f.read().strip()
|
611
|
+
if not content:
|
612
|
+
raise ValueError(f"Empty config at {config_path}'")
|
613
|
+
try:
|
614
|
+
return json.loads(content)
|
615
|
+
except json.JSONDecodeError as e:
|
616
|
+
raise ValueError(f"JSON crap in {config_path}: {e}")
|
617
|
+
parent = os.path.dirname(current_dir)
|
618
|
+
if parent == current_dir:
|
619
|
+
raise FileNotFoundError("Can't find '.rgwfuncsrc' anywhere useful")
|
620
|
+
current_dir = parent
|
621
|
+
|
622
|
+
def get_db_preset(config: dict, preset_name: str) -> dict:
|
623
|
+
"""Grab the preset or complain."""
|
624
|
+
db_presets = config.get('db_presets', [])
|
625
|
+
for p in db_presets:
|
626
|
+
if p.get('name') == preset_name:
|
627
|
+
return p
|
628
|
+
raise RuntimeError(f"Preset '{preset_name}' missing, go add it")
|
629
|
+
|
630
|
+
# Input validation—because people are idiots and will screw this up
|
631
|
+
direct_creds = [private_key_path, private_key_password, account, user, warehouse, database, schema]
|
632
|
+
has_direct = any(d is not None for d in direct_creds)
|
633
|
+
if preset and has_direct:
|
634
|
+
raise ValueError("Don't mix preset with direct params, pick one")
|
635
|
+
if not preset and not has_direct: # Wait, better: require at least the core ones
|
636
|
+
if not private_key_path or not account or not user:
|
637
|
+
raise ValueError("Need private_key_path, account, and user if no preset")
|
638
|
+
if not preset and not preset:
|
639
|
+
raise ValueError("Provide preset or direct creds")
|
640
|
+
|
641
|
+
# Load creds from preset
|
642
|
+
if preset:
|
643
|
+
config = get_config()
|
644
|
+
creds = get_db_preset(config, preset)
|
645
|
+
if creds.get('db_type') != 'snowflake':
|
646
|
+
raise ValueError(f"Preset '{preset}' isn't for Snowflake")
|
647
|
+
private_key_path = creds.get('private_key_path')
|
648
|
+
private_key_password = creds.get('private_key_password')
|
649
|
+
account = creds.get('account')
|
650
|
+
user = creds.get('user')
|
651
|
+
warehouse = creds.get('warehouse')
|
652
|
+
database = creds.get('database')
|
653
|
+
schema = creds.get('schema')
|
654
|
+
if not private_key_path or not account or not user:
|
655
|
+
raise ValueError(f"Missing key creds in preset '{preset}'")
|
656
|
+
|
657
|
+
# Load the damn private key
|
658
|
+
if not os.path.exists(private_key_path):
|
659
|
+
raise FileNotFoundError(f"Private key missing at {private_key_path}")
|
660
|
+
with open(private_key_path, "rb") as key_file:
|
661
|
+
pkey_data = key_file.read()
|
662
|
+
private_key = serialization.load_pem_private_key(
|
663
|
+
pkey_data,
|
664
|
+
password=private_key_password.encode() if private_key_password else None,
|
665
|
+
backend=default_backend()
|
666
|
+
)
|
667
|
+
|
668
|
+
# Connect and query—keep it tight, no bloat
|
669
|
+
try:
|
670
|
+
conn = {
|
671
|
+
'account': account,
|
672
|
+
'user': user,
|
673
|
+
'private_key': private_key,
|
674
|
+
}
|
675
|
+
if warehouse:
|
676
|
+
conn['warehouse'] = warehouse
|
677
|
+
if database:
|
678
|
+
conn['database'] = database
|
679
|
+
if schema:
|
680
|
+
conn['schema'] = schema
|
681
|
+
conn = snowflake.connector.connect(**conn)
|
682
|
+
cur = conn.cursor()
|
683
|
+
results = cur.execute(query).fetchall()
|
684
|
+
columns = [desc[0] for desc in cur.description]
|
685
|
+
cur.close()
|
686
|
+
conn.close()
|
687
|
+
return pd.DataFrame(results, columns=columns)
|
688
|
+
except Exception as e:
|
689
|
+
raise RuntimeError(f"Query bombed: {e}")
|
690
|
+
|
691
|
+
|
566
692
|
def load_data_from_aws_athena_query(
|
567
693
|
query: str,
|
568
694
|
aws_region: Optional[str] = None,
|
@@ -1,9 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.116
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
|
-
Home-page: https://github.com/ryangerardwilson/rgwfuncs
|
6
|
-
Author: Ryan Gerard Wilson
|
7
5
|
Author-email: Ryan Gerard Wilson <ryangerardwilson@gmail.com>
|
8
6
|
Project-URL: Homepage, https://github.com/ryangerardwilson/rgwfuncs
|
9
7
|
Project-URL: Issues, https://github.com/ryangerardwilson/rgwfuncs
|
@@ -25,6 +23,7 @@ Requires-Dist: slack-sdk
|
|
25
23
|
Requires-Dist: google-api-python-client
|
26
24
|
Requires-Dist: boto3
|
27
25
|
Requires-Dist: pyfiglet
|
26
|
+
Requires-Dist: snowflake-connector-python
|
28
27
|
Dynamic: license-file
|
29
28
|
|
30
29
|
# RGWFUNCS
|
@@ -0,0 +1,10 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=G0i_r5bBDuhGgKCCGaSs0sz0_ZdBicaYRtjZLyR-ntY,1380
|
2
|
+
rgwfuncs/df_lib.py,sha256=RvCBf-YU9LaM-Ilzt60e9JB-AMcFyS5PH9w6E_7pmuw,90516
|
3
|
+
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
4
|
+
rgwfuncs/interactive_shell_lib.py,sha256=YeJBW9YgH5Nv77ONdOyIKFgtf0ItXStdlKGN9GGf8bU,4228
|
5
|
+
rgwfuncs/str_lib.py,sha256=vQ4CYzSLYDIWh4WM1Kjhbg6DqbX4rX6VKdJ_EBJJVyE,11322
|
6
|
+
rgwfuncs-0.0.116.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
7
|
+
rgwfuncs-0.0.116.dist-info/METADATA,sha256=8Pa7P34dybjZqBmUc5ZCMn6id264sdaqSm_mf2TxUBk,42931
|
8
|
+
rgwfuncs-0.0.116.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
rgwfuncs-0.0.116.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
10
|
+
rgwfuncs-0.0.116.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=WafmLtqJRnQ7LWU7Son0inje75tF-m6qHJqrmCgiM84,1354
|
2
|
-
rgwfuncs/df_lib.py,sha256=lcKrMj8IpxWxnWkBFekWBQd9-Ed2nXTsHo57gTeATjE,85153
|
3
|
-
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
4
|
-
rgwfuncs/interactive_shell_lib.py,sha256=YeJBW9YgH5Nv77ONdOyIKFgtf0ItXStdlKGN9GGf8bU,4228
|
5
|
-
rgwfuncs/str_lib.py,sha256=vQ4CYzSLYDIWh4WM1Kjhbg6DqbX4rX6VKdJ_EBJJVyE,11322
|
6
|
-
rgwfuncs-0.0.113.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
7
|
-
rgwfuncs-0.0.113.dist-info/METADATA,sha256=a7_g8iP1CxnhsQJFe7kmnYa6eIahl0jiRLlq4B7B-Ww,42972
|
8
|
-
rgwfuncs-0.0.113.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
-
rgwfuncs-0.0.113.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
10
|
-
rgwfuncs-0.0.113.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
11
|
-
rgwfuncs-0.0.113.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|