rgwfuncs 0.0.66__tar.gz → 0.0.68__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.66
3
+ Version: 0.0.68
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.66"
7
+ version = "0.0.68"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.66
3
+ version = 0.0.68
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -2,7 +2,7 @@
2
2
  # Dynamically importing functions from modules
3
3
 
4
4
  from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
5
- from .interactive_shell_lib import interactive_shell
5
+ from .interactive_shell_lib import interactive_shell, setup_readline
6
6
  from .algebra_lib import cancel_polynomial_expression, compute_constant_expression, compute_constant_expression_involving_matrices, compute_constant_expression_involving_ordered_series, compute_prime_factors, expand_polynomial_expression, factor_polynomial_expression, plot_polynomial_functions, plot_x_points_of_polynomial_functions, python_polynomial_expression_to_latex, simplify_polynomial_expression, solve_homogeneous_polynomial_expression
7
7
  from .docs_lib import docs
8
8
  from .str_lib import send_telegram_message
@@ -509,6 +509,10 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
509
509
  # Ensure the file path is absolute
510
510
  file_path = os.path.abspath(file_path)
511
511
 
512
+ # Check if the file exists
513
+ if not os.path.isfile(file_path):
514
+ raise ValueError(f"File not found: {file_path}")
515
+
512
516
  # Determine file type by extension
513
517
  file_extension = file_path.split('.')[-1].lower()
514
518
 
@@ -518,6 +522,8 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
518
522
  df.replace('', None, inplace=True)
519
523
  elif file_extension in ['xls', 'xlsx']:
520
524
  df = pd.read_excel(file_path)
525
+ elif file_extension == 'ods':
526
+ df = pd.read_excel(file_path, engine='odf')
521
527
  elif file_extension == 'json':
522
528
  df = pd.read_json(file_path)
523
529
  elif file_extension == 'parquet':
@@ -2013,24 +2019,24 @@ def sync_dataframe_to_sqlite_database(
2013
2019
  def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
2014
2020
  """
2015
2021
  Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
2016
-
2022
+
2017
2023
  This function checks a specified directory for the most recent cache file matching a specified prefix.
2018
2024
  If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
2019
2025
  Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
2020
2026
 
2021
2027
  Parameters:
2022
- - fetch_func (typing.Callable[[], pd.DataFrame]):
2028
+ - fetch_func (typing.Callable[[], pd.DataFrame]):
2023
2029
  A callable function that, when executed, returns a pandas DataFrame with fresh data.
2024
- - cache_dir (str):
2030
+ - cache_dir (str):
2025
2031
  The directory where cache files are stored.
2026
- - file_prefix (str):
2032
+ - file_prefix (str):
2027
2033
  The prefix used for cache filenames to identify relevant cache files.
2028
- - cache_cutoff_hours (int):
2034
+ - cache_cutoff_hours (int):
2029
2035
  The maximum age of a cache file (in hours) to be considered valid.
2030
2036
  If no file is fresh enough, fresh data will be fetched.
2031
2037
 
2032
2038
  Returns:
2033
- - pd.DataFrame:
2039
+ - pd.DataFrame:
2034
2040
  The pandas DataFrame containing either cached or freshly fetched data.
2035
2041
  """
2036
2042
 
@@ -2047,7 +2053,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2047
2053
  # Retrieve the latest cache file if it exists
2048
2054
  for filename in os.listdir(cache_dir):
2049
2055
  if filename.startswith(file_prefix) and filename.endswith(".csv"):
2050
- timestamp_str: str = filename[len(file_prefix)+1:].replace('.csv', '')
2056
+ timestamp_str: str = filename[len(file_prefix) + 1:].replace('.csv', '')
2051
2057
  try:
2052
2058
  file_time: datetime = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
2053
2059
  if latest_cache_time is None or file_time > latest_cache_time:
@@ -2069,4 +2075,3 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2069
2075
  df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
2070
2076
 
2071
2077
  return df
2072
-
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env python3
2
+ import code
3
+ import readline
4
+ import rlcompleter # noqa: F401
5
+ import sys # noqa: F401
6
+ import os
7
+ import atexit
8
+ from typing import Dict, Any
9
+ from .df_lib import * # noqa: F401, F403, E402
10
+ from .algebra_lib import * # noqa: F401, F403, E402
11
+ from .str_lib import * # noqa: F401, F403, E402
12
+ from .docs_lib import * # noqa: F401, F403, E402
13
+
14
+ # File for command history
15
+ HISTORY_FILE = os.path.expanduser("~/.rgwfuncs_shell_history")
16
+
17
+ def setup_readline():
18
+ """Set up readline for command history persistence"""
19
+ readline.set_history_length(1000) # Limit history to 1000 lines
20
+ readline.parse_and_bind("tab: complete") # Enable tab completion
21
+ if os.path.exists(HISTORY_FILE):
22
+ try:
23
+ readline.read_history_file(HISTORY_FILE)
24
+ except Exception as e:
25
+ print(f"Warning: Could not load history file: {e}")
26
+ atexit.register(readline.write_history_file, HISTORY_FILE)
27
+
28
+ def interactive_shell(local_vars: Dict[str, Any]) -> None:
29
+ """
30
+ Launches an interactive prompt for inspecting and modifying local variables, making all methods
31
+ in the rgwfuncs library available by default. Persists command history across sessions.
32
+
33
+ Parameters:
34
+ local_vars (dict): Dictionary of local variables to be available in the interactive shell.
35
+ """
36
+ if not isinstance(local_vars, dict):
37
+ raise TypeError("local_vars must be a dictionary")
38
+
39
+ # Set up readline for history and completion
40
+ setup_readline()
41
+
42
+ # Make imported functions available in the REPL
43
+ local_vars.update(globals())
44
+
45
+ # Create interactive console with local context
46
+ console = code.InteractiveConsole(locals=local_vars)
47
+
48
+ # Start interactive session with a custom banner
49
+ banner = "Welcome to the rgwfuncs interactive shell.\nUse up/down arrows for command history.\nType 'exit()' or Ctrl+D to quit."
50
+ console.interact(banner=banner, exitmsg="Goodbye.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.66
3
+ Version: 0.0.68
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,32 +0,0 @@
1
- import code
2
- import readline
3
- import rlcompleter # noqa: F401
4
- import sys # noqa: F401
5
- from typing import Dict, Any
6
- from .df_lib import * # noqa: F401, F403, E402
7
- from .algebra_lib import * # noqa: F401, F403, E402
8
- from .str_lib import * # noqa: F401, F403, E402
9
- from .docs_lib import * # noqa: F401, F403, E402
10
-
11
-
12
- def interactive_shell(local_vars: Dict[str, Any]) -> None:
13
- """
14
- Launches an interactive prompt for inspecting and modifying local variables, making all methods
15
- in the rgwfuncs library available by default.
16
-
17
- Parameters:
18
- local_vars (dict): Dictionary of local variables to be available in the interactive shell.
19
- """
20
- if not isinstance(local_vars, dict):
21
- raise TypeError("local_vars must be a dictionary")
22
-
23
- readline.parse_and_bind("tab: complete")
24
-
25
- # Make imported functions available in the REPL
26
- local_vars.update(globals())
27
-
28
- # Create interactive console with local context
29
- console = code.InteractiveConsole(locals=local_vars)
30
-
31
- # Start interactive session
32
- console.interact(banner="Welcome to the rgwfuncs interactive shell.")
File without changes
File without changes