rgwfuncs 0.0.108__py3-none-any.whl → 0.0.110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # This file is automatically generated
2
2
  # Dynamically importing functions from modules
3
3
 
4
- from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
4
+ from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_aws_athena_query, load_data_from_big_query, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
5
5
  from .interactive_shell_lib import interactive_shell
6
6
  from .docs_lib import docs
7
7
  from .str_lib import heading, send_telegram_message, sub_heading, title
rgwfuncs/df_lib.py CHANGED
@@ -310,98 +310,150 @@ def drop_duplicates_retain_last(
310
310
  return df.drop_duplicates(subset=columns_list, keep='last')
311
311
 
312
312
 
313
- def load_data_from_query(db_preset_name: str, query: str, config: Optional[Union[str, dict]] = None) -> pd.DataFrame:
313
+ def load_data_from_query(
314
+ query: str,
315
+ preset: Optional[str] = None,
316
+ db_type: Optional[str] = None,
317
+ host: Optional[str] = None,
318
+ username: Optional[str] = None,
319
+ password: Optional[str] = None,
320
+ database: Optional[str] = None
321
+ ) -> pd.DataFrame:
314
322
  """
315
- Load data from a database query into a DataFrame based on a configuration preset.
323
+ Load data from a database query into a DataFrame using either a preset or provided credentials.
316
324
 
317
325
  Parameters:
318
- db_preset_name: The name of the database preset in the configuration file.
319
- query: The SQL query to execute.
320
- config (Optional[Union[str, dict]], optional): Configuration source. Can be:
321
- - None: Searches for '.rgwfuncsrc' in current directory and upwards
322
- - str: Path to a JSON configuration file
323
- - dict: Direct configuration dictionary
326
+ query (str): The SQL query to execute.
327
+ preset (Optional[str]): The name of the database preset in the .rgwfuncsrc file.
328
+ db_type (Optional[str]): The database type ('mssql', 'mysql', or 'clickhouse').
329
+ host (Optional[str]): Database host.
330
+ username (Optional[str]): Database username.
331
+ password (Optional[str]): Database password.
332
+ database (Optional[str]): Database name.
324
333
 
325
334
  Returns:
326
- A DataFrame containing the query result.
335
+ pd.DataFrame: DataFrame containing the query result.
327
336
 
328
337
  Raises:
329
- FileNotFoundError: If no '.rgwfuncsrc' file is found after traversing all parent directories.
330
- ValueError: If the database preset or db_type is invalid.
338
+ FileNotFoundError: If no '.rgwfuncsrc' file is found when using preset.
339
+ ValueError: If both preset and direct credentials are provided, neither is provided,
340
+ required credentials are missing, or db_type is invalid.
341
+ RuntimeError: If the preset is not found or necessary preset details are missing.
331
342
  """
332
-
333
- def get_config(config: Optional[Union[str, dict]] = None) -> dict:
334
- """Get configuration either from a path, direct dictionary, or by searching upwards."""
335
- def get_config_from_file(config_path: str) -> dict:
336
- """Load configuration from a JSON file."""
337
- with open(config_path, 'r') as file:
338
- return json.load(file)
339
-
343
+ def get_config() -> dict:
344
+ """Get configuration by searching for '.rgwfuncsrc' in current directory and upwards."""
340
345
  def find_config_file() -> str:
341
- """Search for '.rgwfuncsrc' in current directory and upwards."""
342
346
  current_dir = os.getcwd()
343
347
  while True:
344
348
  config_path = os.path.join(current_dir, '.rgwfuncsrc')
345
349
  if os.path.isfile(config_path):
346
350
  return config_path
347
351
  parent_dir = os.path.dirname(current_dir)
348
- if parent_dir == current_dir: # Reached root directory
352
+ if parent_dir == current_dir:
349
353
  raise FileNotFoundError("No '.rgwfuncsrc' file found in current or parent directories")
350
354
  current_dir = parent_dir
351
355
 
352
- # Determine the config to use
353
- if config is None:
354
- # Search for .rgwfuncsrc upwards from current directory
355
- config_path = find_config_file()
356
- return get_config_from_file(config_path)
357
- elif isinstance(config, str):
358
- # If config is a string, treat it as a path and load it
359
- return get_config_from_file(config)
360
- elif isinstance(config, dict):
361
- # If config is already a dict, use it directly
362
- return config
363
- else:
364
- raise ValueError("Config must be either a path string or a dictionary")
365
-
366
- def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
367
- server = db_preset['host']
368
- user = db_preset['username']
369
- password = db_preset['password']
370
- database = db_preset.get('database', '')
356
+ config_path = find_config_file()
357
+ with open(config_path, 'r', encoding='utf-8') as file:
358
+ content = file.read()
359
+ if not content.strip():
360
+ raise ValueError(f"Config file {config_path} is empty")
361
+ try:
362
+ return json.loads(content)
363
+ except json.JSONDecodeError as e:
364
+ raise ValueError(f"Invalid JSON in config file {config_path}: {e}")
365
+
366
+ def get_db_preset(config: dict, preset_name: str) -> dict:
367
+ """Retrieve the database preset from the configuration."""
368
+ db_presets = config.get('db_presets', [])
369
+ for preset in db_presets:
370
+ if preset.get('name') == preset_name:
371
+ return preset
372
+ raise RuntimeError(f"Database preset '{preset_name}' not found in the configuration file")
373
+
374
+ def validate_credentials(
375
+ db_type: str,
376
+ host: Optional[str] = None,
377
+ username: Optional[str] = None,
378
+ password: Optional[str] = None,
379
+ database: Optional[str] = None
380
+ ) -> dict:
381
+ """Validate credentials and return a credentials dictionary."""
382
+ required_fields = {
383
+ 'mssql': ['host', 'username', 'password'],
384
+ 'mysql': ['host', 'username', 'password'],
385
+ 'clickhouse': ['host', 'username', 'password', 'database']
386
+ }
387
+ if db_type not in required_fields:
388
+ raise ValueError(f"Unsupported db_type: {db_type}")
389
+
390
+ credentials = {
391
+ 'db_type': db_type,
392
+ 'host': host,
393
+ 'username': username,
394
+ 'password': password,
395
+ 'database': database
396
+ }
397
+ missing = [field for field in required_fields[db_type] if credentials[field] is None]
398
+ if missing:
399
+ raise ValueError(f"Missing required credentials for {db_type}: {missing}")
400
+ return credentials
401
+
402
+ # Validate input parameters
403
+ all_credentials = [host, username, password, database]
404
+ if preset and any(all_credentials):
405
+ raise ValueError("Cannot specify both preset and direct credentials")
406
+ if not preset and not db_type:
407
+ raise ValueError("Either preset or db_type with credentials must be provided")
408
+
409
+ # Get credentials
410
+ if preset:
411
+ config_dict = get_config()
412
+ credentials = get_db_preset(config_dict, preset)
413
+ db_type = credentials.get('db_type')
414
+ if not db_type:
415
+ raise ValueError(f"Preset '{preset}' does not specify db_type")
416
+ else:
417
+ credentials = validate_credentials(
418
+ db_type=db_type,
419
+ host=host,
420
+ username=username,
421
+ password=password,
422
+ database=database
423
+ )
371
424
 
425
+ # Query functions
426
+ def query_mssql(credentials: dict, query: str) -> pd.DataFrame:
427
+ server = credentials['host']
428
+ user = credentials['username']
429
+ password = credentials['password']
430
+ database = credentials.get('database', '')
372
431
  with pymssql.connect(server=server, user=user, password=password, database=database) as conn:
373
432
  with conn.cursor() as cursor:
374
433
  cursor.execute(query)
375
434
  rows = cursor.fetchall()
376
435
  columns = [desc[0] for desc in cursor.description]
377
-
378
436
  return pd.DataFrame(rows, columns=columns)
379
437
 
380
- def query_mysql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
381
- host = db_preset['host']
382
- user = db_preset['username']
383
- password = db_preset['password']
384
- database = db_preset.get('database', '')
385
-
438
+ def query_mysql(credentials: dict, query: str) -> pd.DataFrame:
439
+ host = credentials['host']
440
+ user = credentials['username']
441
+ password = credentials['password']
442
+ database = credentials.get('database', '')
386
443
  with mysql.connector.connect(host=host, user=user, password=password, database=database) as conn:
387
444
  with conn.cursor() as cursor:
388
445
  cursor.execute(query)
389
446
  rows = cursor.fetchall()
390
- columns = ([desc[0] for desc in cursor.description]
391
- if cursor.description else [])
392
-
447
+ columns = [desc[0] for desc in cursor.description] if cursor.description else []
393
448
  return pd.DataFrame(rows, columns=columns)
394
449
 
395
- def query_clickhouse(
396
- db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
397
- host = db_preset['host']
398
- user = db_preset['username']
399
- password = db_preset['password']
400
- database = db_preset['database']
401
-
450
+ def query_clickhouse(credentials: dict, query: str) -> pd.DataFrame:
451
+ host = credentials['host']
452
+ user = credentials['username']
453
+ password = credentials['password']
454
+ database = credentials['database']
402
455
  max_retries = 5
403
456
  retry_delay = 5
404
-
405
457
  for attempt in range(max_retries):
406
458
  try:
407
459
  client = clickhouse_connect.get_client(
@@ -411,99 +463,228 @@ def load_data_from_query(db_preset_name: str, query: str, config: Optional[Union
411
463
  columns = data.column_names
412
464
  return pd.DataFrame(rows, columns=columns)
413
465
  except Exception as e:
414
- print(f"Attempt {attempt + 1} failed: {e}")
415
466
  if attempt < max_retries - 1:
416
- print(f"Retrying in {retry_delay} seconds...")
417
467
  time.sleep(retry_delay)
418
468
  else:
419
- raise ConnectionError(
420
- "All attempts to connect to ClickHouse failed.")
421
-
422
- def query_google_big_query(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
423
- json_file_path = db_preset['json_file_path']
424
- project_id = db_preset['project_id']
469
+ raise ConnectionError("All attempts to connect to ClickHouse failed.")
425
470
 
426
- credentials = service_account.Credentials.from_service_account_file(
427
- json_file_path)
428
- client = bigquery.Client(credentials=credentials, project=project_id)
471
+ # Execute query based on db_type
472
+ if db_type == 'mssql':
473
+ return query_mssql(credentials, query)
474
+ elif db_type == 'mysql':
475
+ return query_mysql(credentials, query)
476
+ elif db_type == 'clickhouse':
477
+ return query_clickhouse(credentials, query)
478
+ else:
479
+ raise ValueError(f"Unsupported db_type: {db_type}")
429
480
 
430
- query_job = client.query(query)
431
- results = query_job.result()
432
- rows = [list(row.values()) for row in results]
433
- columns = [field.name for field in results.schema]
434
481
 
435
- return pd.DataFrame(rows, columns=columns)
482
+ def load_data_from_big_query(
483
+ query: str,
484
+ json_file_path: Optional[str] = None,
485
+ project_id: Optional[str] = None,
486
+ preset: Optional[str] = None
487
+ ) -> pd.DataFrame:
488
+ """
489
+ Load data from a Google Big Query query into a DataFrame.
436
490
 
437
- def query_athena(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
491
+ Parameters:
492
+ query (str): The SQL query to execute.
493
+ json_file_path (Optional[str]): Path to the Google Cloud service account JSON file.
494
+ project_id (Optional[str]): Google Cloud project ID.
495
+ preset (Optional[str]): The name of the Big Query preset in the .rgwfuncsrc file.
438
496
 
439
- def execute_athena_query(athena_client, query: str, database: str, output_bucket: str) -> str:
440
- response = athena_client.start_query_execution(
441
- QueryString=query,
442
- QueryExecutionContext={"Database": database},
443
- ResultConfiguration={"OutputLocation": output_bucket}
444
- )
445
- return response["QueryExecutionId"]
497
+ Returns:
498
+ pd.DataFrame: DataFrame containing the query result.
446
499
 
447
- def wait_for_athena_query_to_complete(athena_client, query_execution_id: str):
500
+ Raises:
501
+ ValueError: If both preset and direct credentials are provided, neither is provided,
502
+ or required credentials are missing.
503
+ FileNotFoundError: If no '.rgwfuncsrc' file is found when using preset.
504
+ RuntimeError: If the preset is not found or necessary preset details are missing.
505
+ """
506
+ def get_config() -> dict:
507
+ """Get configuration by searching for '.rgwfuncsrc' in current directory and upwards."""
508
+ def find_config_file() -> str:
509
+ current_dir = os.getcwd()
448
510
  while True:
449
- response = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
450
- state = response["QueryExecution"]["Status"]["State"]
451
- if state == "SUCCEEDED":
452
- break
453
- elif state in ("FAILED", "CANCELLED"):
454
- raise Exception(f"Query failed with state: {state}")
455
- time.sleep(1)
456
-
457
- def download_athena_query_results(athena_client, query_execution_id: str) -> pd.DataFrame:
458
- paginator = athena_client.get_paginator("get_query_results")
459
- result_pages = paginator.paginate(QueryExecutionId=query_execution_id)
460
- rows = []
461
- columns = []
462
- for page in result_pages:
463
- if not columns:
464
- columns = [col["Name"] for col in page["ResultSet"]["ResultSetMetadata"]["ColumnInfo"]]
465
- rows.extend(page["ResultSet"]["Rows"])
466
-
467
- data = [[col.get("VarCharValue", None) for col in row["Data"]] for row in rows[1:]]
468
- return pd.DataFrame(data, columns=columns)
469
-
470
- aws_region = db_preset['aws_region']
471
- database = db_preset['database']
472
- output_bucket = db_preset['output_bucket']
473
-
474
- athena_client = boto3.client(
475
- 'athena',
476
- region_name=aws_region,
477
- aws_access_key_id=db_preset['aws_access_key'],
478
- aws_secret_access_key=db_preset['aws_secret_key']
479
- )
511
+ config_path = os.path.join(current_dir, '.rgwfuncsrc')
512
+ if os.path.isfile(config_path):
513
+ return config_path
514
+ parent_dir = os.path.dirname(current_dir)
515
+ if parent_dir == current_dir:
516
+ raise FileNotFoundError("No '.rgwfuncsrc' file found in current or parent directories")
517
+ current_dir = parent_dir
480
518
 
481
- query_execution_id = execute_athena_query(athena_client, query, database, output_bucket)
482
- wait_for_athena_query_to_complete(athena_client, query_execution_id)
483
- return download_athena_query_results(athena_client, query_execution_id)
519
+ config_path = find_config_file()
520
+ with open(config_path, 'r', encoding='utf-8') as file:
521
+ content = file.read()
522
+ if not content.strip():
523
+ raise ValueError(f"Config file {config_path} is empty")
524
+ try:
525
+ return json.loads(content)
526
+ except json.JSONDecodeError as e:
527
+ raise ValueError(f"Invalid JSON in config file {config_path}: {e}")
528
+
529
+ def get_db_preset(config: dict, preset_name: str) -> dict:
530
+ """Retrieve the database preset from the configuration."""
531
+ db_presets = config.get('db_presets', [])
532
+ for preset in db_presets:
533
+ if preset.get('name') == preset_name:
534
+ return preset
535
+ raise RuntimeError(f"Database preset '{preset_name}' not found in the configuration file")
536
+
537
+ # Validate inputs
538
+ if preset and (json_file_path or project_id):
539
+ raise ValueError("Cannot specify both preset and json_file_path/project_id")
540
+ if not preset and (bool(json_file_path) != bool(project_id)):
541
+ raise ValueError("Both json_file_path and project_id must be provided if preset is not used")
542
+ if not preset and not json_file_path and not project_id:
543
+ raise ValueError("Either preset or both json_file_path and project_id must be provided")
544
+
545
+ # Get credentials
546
+ if preset:
547
+ config_dict = get_config()
548
+ credentials = get_db_preset(config_dict, preset)
549
+ if credentials.get('db_type') != 'google_big_query':
550
+ raise ValueError(f"Preset '{preset}' is not for google_big_query")
551
+ json_file_path = credentials.get('json_file_path')
552
+ project_id = credentials.get('project_id')
553
+ if not json_file_path or not project_id:
554
+ raise ValueError(f"Missing json_file_path or project_id in preset '{preset}'")
555
+
556
+ # Execute query
557
+ credentials_obj = service_account.Credentials.from_service_account_file(json_file_path)
558
+ client = bigquery.Client(credentials=credentials_obj, project=project_id)
559
+ query_job = client.query(query)
560
+ results = query_job.result()
561
+ rows = [list(row.values()) for row in results]
562
+ columns = [field.name for field in results.schema]
563
+ return pd.DataFrame(rows, columns=columns)
564
+
565
+
566
+ def load_data_from_aws_athena_query(
567
+ query: str,
568
+ aws_region: Optional[str] = None,
569
+ database: Optional[str] = None,
570
+ output_bucket: Optional[str] = None,
571
+ aws_access_key: Optional[str] = None,
572
+ aws_secret_key: Optional[str] = None,
573
+ preset: Optional[str] = None
574
+ ) -> pd.DataFrame:
575
+ """
576
+ Load data from an AWS Athena query into a DataFrame.
484
577
 
485
- config = get_config(config)
486
- db_presets = config.get('db_presets', [])
487
- db_preset = next(
488
- (preset for preset in db_presets if preset['name'] == db_preset_name),
489
- None)
490
- if not db_preset:
491
- raise ValueError(f"No matching db_preset found for {db_preset_name}")
578
+ Parameters:
579
+ query (str): The SQL query to execute.
580
+ aws_region (Optional[str]): AWS region for Athena.
581
+ database (Optional[str]): Athena database name.
582
+ output_bucket (Optional[str]): S3 bucket for query results.
583
+ aws_access_key (Optional[str]): AWS access key ID.
584
+ aws_secret_key (Optional[str]): AWS secret access key.
585
+ preset (Optional[str]): The name of the Athena preset in the .rgwfuncsrc file.
492
586
 
493
- db_type = db_preset['db_type']
587
+ Returns:
588
+ pd.DataFrame: DataFrame containing the query result.
494
589
 
495
- if db_type == 'mssql':
496
- return query_mssql(db_preset, query)
497
- elif db_type == 'mysql':
498
- return query_mysql(db_preset, query)
499
- elif db_type == 'clickhouse':
500
- return query_clickhouse(db_preset, query)
501
- elif db_type == 'google_big_query':
502
- return query_google_big_query(db_preset, query)
503
- elif db_type == 'aws_athena':
504
- return query_athena(db_preset, query)
505
- else:
506
- raise ValueError(f"Unsupported db_type: {db_type}")
590
+ Raises:
591
+ ValueError: If both preset and direct credentials are provided, neither is provided,
592
+ or required credentials are missing.
593
+ FileNotFoundError: If no '.rgwfuncsrc' file is found when using preset.
594
+ RuntimeError: If the preset is not found or necessary preset details are missing.
595
+ """
596
+ def get_config() -> dict:
597
+ """Get configuration by searching for '.rgwfuncsrc' in current directory and upwards."""
598
+ def find_config_file() -> str:
599
+ current_dir = os.getcwd()
600
+ while True:
601
+ config_path = os.path.join(current_dir, '.rgwfuncsrc')
602
+ if os.path.isfile(config_path):
603
+ return config_path
604
+ parent_dir = os.path.dirname(current_dir)
605
+ if parent_dir == current_dir:
606
+ raise FileNotFoundError("No '.rgwfuncsrc' file found in current or parent directories")
607
+ current_dir = parent_dir
608
+
609
+ config_path = find_config_file()
610
+ with open(config_path, 'r', encoding='utf-8') as file:
611
+ content = file.read()
612
+ if not content.strip():
613
+ raise ValueError(f"Config file {config_path} is empty")
614
+ try:
615
+ return json.loads(content)
616
+ except json.JSONDecodeError as e:
617
+ raise ValueError(f"Invalid JSON in config file {config_path}: {e}")
618
+
619
+ def get_db_preset(config: dict, preset_name: str) -> dict:
620
+ """Retrieve the database preset from the configuration."""
621
+ db_presets = config.get('db_presets', [])
622
+ for preset in db_presets:
623
+ if preset.get('name') == preset_name:
624
+ return preset
625
+ raise RuntimeError(f"Database preset '{preset_name}' not found in the configuration file")
626
+
627
+ # Validate inputs
628
+ if preset and any([aws_region, database, output_bucket, aws_access_key, aws_secret_key]):
629
+ raise ValueError("Cannot specify both preset and direct Athena credentials")
630
+ required = [aws_region, database, output_bucket, aws_access_key, aws_secret_key]
631
+ if not preset and not all(required):
632
+ raise ValueError("All Athena credentials (aws_region, database, output_bucket, aws_access_key, aws_secret_key) must be provided if preset is not used")
633
+
634
+ # Get credentials
635
+ if preset:
636
+ config_dict = get_config()
637
+ credentials = get_db_preset(config_dict, preset)
638
+ if credentials.get('db_type') != 'aws_athena':
639
+ raise ValueError(f"Preset '{preset}' is not for aws_athena")
640
+ aws_region = credentials.get('aws_region')
641
+ database = credentials.get('database')
642
+ output_bucket = credentials.get('output_bucket')
643
+ aws_access_key = credentials.get('aws_access_key')
644
+ aws_secret_key = credentials.get('aws_secret_key')
645
+ if not all([aws_region, database, output_bucket, aws_access_key, aws_secret_key]):
646
+ raise ValueError(f"Missing required Athena credentials in preset '{preset}'")
647
+
648
+ # Execute query
649
+ def execute_athena_query(athena_client, query: str, database: str, output_bucket: str) -> str:
650
+ response = athena_client.start_query_execution(
651
+ QueryString=query,
652
+ QueryExecutionContext={"Database": database},
653
+ ResultConfiguration={"OutputLocation": output_bucket}
654
+ )
655
+ return response["QueryExecutionId"]
656
+
657
+ def wait_for_athena_query_to_complete(athena_client, query_execution_id: str):
658
+ while True:
659
+ response = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
660
+ state = response["QueryExecution"]["Status"]["State"]
661
+ if state == "SUCCEEDED":
662
+ break
663
+ elif state in ("FAILED", "CANCELLED"):
664
+ raise Exception(f"Query failed with state: {state}")
665
+ time.sleep(1)
666
+
667
+ def download_athena_query_results(athena_client, query_execution_id: str) -> pd.DataFrame:
668
+ paginator = athena_client.get_paginator("get_query_results")
669
+ result_pages = paginator.paginate(QueryExecutionId=query_execution_id)
670
+ rows = []
671
+ columns = []
672
+ for page in result_pages:
673
+ if not columns:
674
+ columns = [col["Name"] for col in page["ResultSet"]["ResultSetMetadata"]["ColumnInfo"]]
675
+ rows.extend(page["ResultSet"]["Rows"])
676
+ data = [[col.get("VarCharValue", None) for col in row["Data"]] for row in rows[1:]]
677
+ return pd.DataFrame(data, columns=columns)
678
+
679
+ athena_client = boto3.client(
680
+ 'athena',
681
+ region_name=aws_region,
682
+ aws_access_key_id=aws_access_key,
683
+ aws_secret_access_key=aws_secret_key
684
+ )
685
+ query_execution_id = execute_athena_query(athena_client, query, database, output_bucket)
686
+ wait_for_athena_query_to_complete(athena_client, query_execution_id)
687
+ return download_athena_query_results(athena_client, query_execution_id)
507
688
 
508
689
 
509
690
  def load_data_from_path(file_path: str) -> pd.DataFrame:
rgwfuncs/str_lib.py CHANGED
@@ -17,66 +17,47 @@ _PRINT_HEADING_CURRENT_CALL = 0
17
17
  _PRINT_SUBHEADING_COUNTS = defaultdict(int) # Tracks sub-headings per heading
18
18
  _CURRENT_HEADING_NUMBER = 0
19
19
 
20
- def send_telegram_message(preset_name: str, message: str, config: Optional[Union[str, dict]] = None) -> None:
20
+
21
+ def send_telegram_message(message: str, preset_name: Optional[str] = None, bot_token: Optional[str] = None,
22
+ chat_id: Optional[str] = None) -> None:
21
23
  """
22
- Send a Telegram message using the specified preset.
24
+ Send a Telegram message using either a preset or provided bot token and chat ID.
23
25
 
24
26
  Args:
25
- preset_name (str): The name of the preset to use for sending the message.
26
27
  message (str): The message to send.
27
- config (Optional[Union[str, dict]], optional): Configuration source. Can be:
28
- - None: Searches for '.rgwfuncsrc' in current directory and upwards
29
- - str: Path to a JSON configuration file
30
- - dict: Direct configuration dictionary
28
+ preset_name (Optional[str]): The name of the preset to use for sending the message.
29
+ bot_token (Optional[str]): The Telegram bot token.
30
+ chat_id (Optional[str]): The Telegram chat ID.
31
31
 
32
32
  Raises:
33
- FileNotFoundError: If no '.rgwfuncsrc' file is found in current or parent directories.
34
- ValueError: If the config parameter is neither a path string nor a dictionary, or if the config file is empty/invalid.
35
- RuntimeError: If the preset is not found or necessary details are missing.
33
+ FileNotFoundError: If no '.rgwfuncsrc' file is found when preset_name is used.
34
+ ValueError: If preset_name is used with bot_token/chat_id, neither preset_name nor both bot_token/chat_id are provided,
35
+ or the config file is empty/invalid.
36
+ RuntimeError: If the preset is not found or necessary preset details are missing.
36
37
  """
37
- def get_config(config: Optional[Union[str, dict]] = None) -> dict:
38
- """Get configuration either from a path, direct dictionary, or by searching upwards."""
39
- def get_config_from_file(config_path: str) -> dict:
40
- """Load configuration from a JSON file."""
41
- # print(f"Reading config from: {config_path}") # Debug line
42
- with open(config_path, 'r', encoding='utf-8') as file:
43
- content = file.read()
44
- # print(f"Config content (first 100 chars): {content[:100]}...") # Debug line
45
- if not content.strip():
46
- raise ValueError(f"Config file {config_path} is empty")
47
- try:
48
- return json.loads(content)
49
- except json.JSONDecodeError as e:
50
- raise ValueError(f"Invalid JSON in config file {config_path}: {e}")
51
-
38
+ def get_config() -> dict:
39
+ """Get configuration by searching for '.rgwfuncsrc' in current directory and upwards."""
52
40
  def find_config_file() -> str:
53
41
  """Search for '.rgwfuncsrc' in current directory and upwards."""
54
42
  current_dir = os.getcwd()
55
- # print(f"Starting config search from: {current_dir}") # Debug line
56
43
  while True:
57
44
  config_path = os.path.join(current_dir, '.rgwfuncsrc')
58
- # print(f"Checking for config at: {config_path}") # Debug line
59
45
  if os.path.isfile(config_path):
60
- # print(f"Found config at: {config_path}") # Debug line
61
46
  return config_path
62
47
  parent_dir = os.path.dirname(current_dir)
63
48
  if parent_dir == current_dir: # Reached root directory
64
49
  raise FileNotFoundError(f"No '.rgwfuncsrc' file found in {os.getcwd()} or parent directories")
65
50
  current_dir = parent_dir
66
51
 
67
- # Determine the config to use
68
- if config is None:
69
- # Search for .rgwfuncsrc upwards from current directory
70
- config_path = find_config_file()
71
- return get_config_from_file(config_path)
72
- elif isinstance(config, str):
73
- # If config is a string, treat it as a path and load it
74
- return get_config_from_file(config)
75
- elif isinstance(config, dict):
76
- # If config is already a dict, use it directly
77
- return config
78
- else:
79
- raise ValueError("Config must be either a path string or a dictionary")
52
+ config_path = find_config_file()
53
+ with open(config_path, 'r', encoding='utf-8') as file:
54
+ content = file.read()
55
+ if not content.strip():
56
+ raise ValueError(f"Config file {config_path} is empty")
57
+ try:
58
+ return json.loads(content)
59
+ except json.JSONDecodeError as e:
60
+ raise ValueError(f"Invalid JSON in config file {config_path}: {e}")
80
61
 
81
62
  def get_telegram_preset(config: dict, preset_name: str) -> dict:
82
63
  """Get the Telegram preset configuration."""
@@ -90,30 +71,36 @@ def send_telegram_message(preset_name: str, message: str, config: Optional[Union
90
71
  """Retrieve the Telegram bot token and chat ID from the preset."""
91
72
  preset = get_telegram_preset(config, preset_name)
92
73
  if not preset:
93
- raise RuntimeError(
94
- f"Telegram bot preset '{preset_name}' not found in the configuration file")
74
+ raise RuntimeError(f"Telegram bot preset '{preset_name}' not found in the configuration file")
95
75
 
96
76
  bot_token = preset.get("bot_token")
97
77
  chat_id = preset.get("chat_id")
98
78
 
99
79
  if not bot_token or not chat_id:
100
- raise RuntimeError(
101
- f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file")
80
+ raise RuntimeError(f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file")
102
81
 
103
82
  return bot_token, chat_id
104
83
 
105
- config = get_config(config)
106
- # Get bot details from the configuration
107
- bot_token, chat_id = get_telegram_bot_details(config, preset_name)
84
+ # Validate input parameters
85
+ if preset_name and (bot_token or chat_id):
86
+ raise ValueError("Cannot specify both preset_name and bot_token/chat_id")
87
+ if not preset_name and (bool(bot_token) != bool(chat_id)):
88
+ raise ValueError("Both bot_token and chat_id must be provided if preset_name is not used")
89
+ if not preset_name and not bot_token and not chat_id:
90
+ raise ValueError("Either preset_name or both bot_token and chat_id must be provided")
91
+
92
+ # Get bot token and chat ID
93
+ if preset_name:
94
+ config = get_config()
95
+ bot_token, chat_id = get_telegram_bot_details(config, preset_name)
108
96
 
109
- # Prepare the request
97
+ # Prepare and send the request
110
98
  url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
111
99
  payload = {"chat_id": chat_id, "text": message}
112
-
113
- # Send the message
114
100
  response = requests.post(url, json=payload)
115
101
  response.raise_for_status()
116
102
 
103
+
117
104
  def title(text: str, font: str = "slant", typing_speed: float = 0.005) -> None:
118
105
  """
119
106
  Print text as ASCII art with a typewriter effect using the specified font (default: slant),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rgwfuncs
3
- Version: 0.0.108
3
+ Version: 0.0.110
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfuncs
6
6
  Author: Ryan Gerard Wilson
@@ -0,0 +1,11 @@
1
+ rgwfuncs/__init__.py,sha256=WafmLtqJRnQ7LWU7Son0inje75tF-m6qHJqrmCgiM84,1354
2
+ rgwfuncs/df_lib.py,sha256=RFCgVcx_NphJ4Q7O6tP8rplBcx1qA4xHtNyjS8lHStY,85209
3
+ rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
4
+ rgwfuncs/interactive_shell_lib.py,sha256=YeJBW9YgH5Nv77ONdOyIKFgtf0ItXStdlKGN9GGf8bU,4228
5
+ rgwfuncs/str_lib.py,sha256=rfzRd7bOc0KQ7UxUN-J6yxY23pHEUHqerVR1u-TyIAY,10690
6
+ rgwfuncs-0.0.110.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
7
+ rgwfuncs-0.0.110.dist-info/METADATA,sha256=nhEZlnLfErGyCrSsbBw-k4Ah5HZ49oiJpj1yqpZGD-k,42972
8
+ rgwfuncs-0.0.110.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ rgwfuncs-0.0.110.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
10
+ rgwfuncs-0.0.110.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
11
+ rgwfuncs-0.0.110.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=XIbMHeEimBLNcX2PK7uQGquG-eAYXsetVEJrlBZIH5U,1295
2
- rgwfuncs/df_lib.py,sha256=3foomRHlunCpf_accTcqfdgwDmSPIVkVCnlWJ4ag4XQ,76947
3
- rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
4
- rgwfuncs/interactive_shell_lib.py,sha256=YeJBW9YgH5Nv77ONdOyIKFgtf0ItXStdlKGN9GGf8bU,4228
5
- rgwfuncs/str_lib.py,sha256=xhYCSxWGHyvfaxhBuKfEExvCAhoOpyw2GhnwFy2CEUI,11403
6
- rgwfuncs-0.0.108.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
7
- rgwfuncs-0.0.108.dist-info/METADATA,sha256=6p59qcnYK2oqE-czQyx7Rp2RAWxRcxHxr5o5hzArZ7I,42972
8
- rgwfuncs-0.0.108.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- rgwfuncs-0.0.108.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
10
- rgwfuncs-0.0.108.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
11
- rgwfuncs-0.0.108.dist-info/RECORD,,