rgwfuncs 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/__init__.py +2 -0
- rgwfuncs/algebra_lib.py +186 -0
- rgwfuncs/docs_lib.py +50 -0
- {rgwfuncs-0.0.23.dist-info → rgwfuncs-0.0.24.dist-info}/METADATA +21 -12
- rgwfuncs-0.0.24.dist-info/RECORD +11 -0
- rgwfuncs-0.0.23.dist-info/RECORD +0 -9
- {rgwfuncs-0.0.23.dist-info → rgwfuncs-0.0.24.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.23.dist-info → rgwfuncs-0.0.24.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.23.dist-info → rgwfuncs-0.0.24.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.23.dist-info → rgwfuncs-0.0.24.dist-info}/top_level.txt +0 -0
rgwfuncs/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# This file is automatically generated
|
2
2
|
# Dynamically importing functions from modules
|
3
3
|
|
4
|
+
from .algebra_lib import compute_algebraic_expression, get_prime_factors_latex, simplify_algebraic_expression, solve_algebraic_expression
|
4
5
|
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, df_docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
6
|
+
from .docs_lib import docs
|
5
7
|
from .str_lib import send_telegram_message, str_docs
|
rgwfuncs/algebra_lib.py
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
import re
|
2
|
+
import math
|
3
|
+
from sympy import symbols, latex, simplify, solve, diff, Expr
|
4
|
+
from sympy.parsing.sympy_parser import parse_expr
|
5
|
+
from typing import Tuple, List, Dict, Optional
|
6
|
+
|
7
|
+
def compute_algebraic_expression(expression: str) -> float:
|
8
|
+
try:
|
9
|
+
# Direct numerical evaluation
|
10
|
+
# Safely evaluate the expression using the math module
|
11
|
+
numeric_result = eval(expression, {"__builtins__": None, "math": math})
|
12
|
+
|
13
|
+
# Convert to float if possible
|
14
|
+
return float(numeric_result)
|
15
|
+
except Exception as e:
|
16
|
+
raise ValueError(f"Error computing expression: {e}")
|
17
|
+
|
18
|
+
def simplify_algebraic_expression(expression: str) -> str:
|
19
|
+
|
20
|
+
|
21
|
+
def recursive_parse_function_call(func_call: str, prefix: str, sym_vars: Dict[str, Expr]) -> Tuple[str, List[Expr]]:
|
22
|
+
# print(f"Parsing function call: {func_call}")
|
23
|
+
|
24
|
+
# Match the function name and arguments
|
25
|
+
match = re.match(fr'{prefix}\.(\w+)\((.*)\)', func_call, re.DOTALL)
|
26
|
+
if not match:
|
27
|
+
raise ValueError(f"Invalid function call: {func_call}")
|
28
|
+
|
29
|
+
func_name = match.group(1)
|
30
|
+
args_str = match.group(2)
|
31
|
+
|
32
|
+
# Check if it's a list for np
|
33
|
+
if prefix == 'np' and args_str.startswith("[") and args_str.endswith("]"):
|
34
|
+
parsed_args = [ast.literal_eval(args_str.strip())]
|
35
|
+
else:
|
36
|
+
parsed_args = []
|
37
|
+
raw_args = re.split(r',(?![^{]*\})', args_str)
|
38
|
+
for arg in raw_args:
|
39
|
+
arg = arg.strip()
|
40
|
+
if re.match(r'\w+\.\w+\(', arg):
|
41
|
+
# Recursively evaluate the argument if it's another function call
|
42
|
+
arg_val = recursive_eval_func(re.match(r'\w+\.\w+\(.*\)', arg), sym_vars)
|
43
|
+
parsed_args.append(parse_expr(arg_val, local_dict=sym_vars))
|
44
|
+
else:
|
45
|
+
parsed_args.append(parse_expr(arg, local_dict=sym_vars))
|
46
|
+
|
47
|
+
# print(f"Function name: {func_name}, Parsed arguments: {parsed_args}")
|
48
|
+
return func_name, parsed_args
|
49
|
+
|
50
|
+
|
51
|
+
def recursive_eval_func(match: re.Match, sym_vars: Dict[str, Expr]) -> str:
|
52
|
+
# print("152", match)
|
53
|
+
func_call = match.group(0)
|
54
|
+
# print(f"153 Evaluating function call: {func_call}")
|
55
|
+
|
56
|
+
if func_call.startswith("np."):
|
57
|
+
func_name, args = recursive_parse_function_call(func_call, 'np', sym_vars)
|
58
|
+
if func_name == 'diff':
|
59
|
+
expr = args[0]
|
60
|
+
if isinstance(expr, list):
|
61
|
+
# Calculate discrete difference
|
62
|
+
diff_result = [expr[i] - expr[i - 1] for i in range(1, len(expr))]
|
63
|
+
return str(diff_result)
|
64
|
+
# Perform symbolic differentiation
|
65
|
+
diff_result = diff(expr)
|
66
|
+
return str(diff_result)
|
67
|
+
|
68
|
+
if func_call.startswith("math."):
|
69
|
+
func_name, args = recursive_parse_function_call(func_call, 'math', sym_vars)
|
70
|
+
if hasattr(math, func_name):
|
71
|
+
result = getattr(math, func_name)(*args)
|
72
|
+
return str(result)
|
73
|
+
|
74
|
+
if func_call.startswith("sym."):
|
75
|
+
initial_method_match = re.match(r'(sym\.\w+\([^()]*\))(\.(\w+)\((.*?)\))*', func_call, re.DOTALL)
|
76
|
+
if initial_method_match:
|
77
|
+
base_expr_str = initial_method_match.group(1)
|
78
|
+
base_func_name, base_args = recursive_parse_function_call(base_expr_str, 'sym', sym_vars)
|
79
|
+
if base_func_name == 'solve':
|
80
|
+
solutions = solve(base_args[0], base_args[1])
|
81
|
+
# print(f"Solutions found: {solutions}")
|
82
|
+
|
83
|
+
method_chain = re.findall(r'\.(\w+)\((.*?)\)', func_call, re.DOTALL)
|
84
|
+
final_solutions = [execute_chained_methods(sol, [(m, [method_args.strip()]) for m, method_args in method_chain], sym_vars) for sol in solutions]
|
85
|
+
|
86
|
+
return "[" + ",".join(latex(simplify(sol)) for sol in final_solutions) + "]"
|
87
|
+
|
88
|
+
raise ValueError(f"Unknown function call: {func_call}")
|
89
|
+
|
90
|
+
def execute_chained_methods(sym_expr: Expr, method_chain: List[Tuple[str, List[str]]], sym_vars: Dict[str, Expr]) -> Expr:
|
91
|
+
for method_name, method_args in method_chain:
|
92
|
+
# print(f"Executing method: {method_name} with arguments: {method_args}")
|
93
|
+
method = getattr(sym_expr, method_name, None)
|
94
|
+
if method:
|
95
|
+
if method_name == 'subs' and isinstance(method_args[0], dict):
|
96
|
+
kwargs = method_args[0]
|
97
|
+
kwargs = {parse_expr(k, local_dict=sym_vars): parse_expr(v, local_dict=sym_vars) for k, v in kwargs.items()}
|
98
|
+
sym_expr = method(kwargs)
|
99
|
+
else:
|
100
|
+
args = [parse_expr(arg.strip(), local_dict=sym_vars) for arg in method_args]
|
101
|
+
sym_expr = method(*args)
|
102
|
+
# print(f"Result after {method_name}: {sym_expr}")
|
103
|
+
return sym_expr
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
variable_names = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
|
108
|
+
sym_vars = {var: symbols(var) for var in variable_names}
|
109
|
+
|
110
|
+
patterns = {
|
111
|
+
#"numpy_diff_brackets": r"np\.diff\(\[.*?\]\)",
|
112
|
+
"numpy_diff_no_brackets": r"np\.diff\([^()]*\)",
|
113
|
+
"math_functions": r"math\.\w+\((?:[^()]*(?:\([^()]*\)[^()]*)*)\)",
|
114
|
+
# "sympy_functions": r"sym\.\w+\([^()]*\)(?:\.\w+\([^()]*\))?",
|
115
|
+
}
|
116
|
+
|
117
|
+
function_pattern = '|'.join(patterns.values())
|
118
|
+
|
119
|
+
# Use a lambda function to pass additional arguments
|
120
|
+
processed_expression = re.sub(function_pattern, lambda match: recursive_eval_func(match, sym_vars), expression)
|
121
|
+
# print("Level 2 processed_expression:", processed_expression)
|
122
|
+
|
123
|
+
try:
|
124
|
+
if processed_expression.startswith('[') and processed_expression.endswith(']'):
|
125
|
+
return processed_expression
|
126
|
+
|
127
|
+
expr = parse_expr(processed_expression, local_dict=sym_vars)
|
128
|
+
final_result = simplify(expr)
|
129
|
+
|
130
|
+
if final_result.free_symbols:
|
131
|
+
latex_result = latex(final_result)
|
132
|
+
return latex_result
|
133
|
+
else:
|
134
|
+
return str(final_result)
|
135
|
+
|
136
|
+
except Exception as e:
|
137
|
+
raise ValueError(f"Error simplifying expression: {e}")
|
138
|
+
|
139
|
+
def solve_algebraic_expression(expression: str, variable: str, subs: Optional[Dict[str, float]] = None) -> str:
|
140
|
+
try:
|
141
|
+
# Create symbols for the variables in the expression
|
142
|
+
variable_symbols = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
|
143
|
+
sym_vars = {var: symbols(var) for var in variable_symbols}
|
144
|
+
|
145
|
+
# Parse the expression and solve it
|
146
|
+
expr = parse_expr(expression, local_dict=sym_vars)
|
147
|
+
var_symbol = symbols(variable)
|
148
|
+
solutions = solve(expr, var_symbol)
|
149
|
+
|
150
|
+
# Apply substitutions if provided
|
151
|
+
if subs:
|
152
|
+
subs_symbols = {symbols(k): v for k, v in subs.items()}
|
153
|
+
solutions = [simplify(sol.subs(subs_symbols)) for sol in solutions]
|
154
|
+
|
155
|
+
# Convert solutions to LaTeX strings if possible
|
156
|
+
latex_solutions = [latex(simplify(sol)) if sol.free_symbols else str(sol) for sol in solutions]
|
157
|
+
result = r"\left[" + ", ".join(latex_solutions) + r"\right]"
|
158
|
+
print("158", result)
|
159
|
+
return result
|
160
|
+
|
161
|
+
except Exception as e:
|
162
|
+
raise ValueError(f"Error solving the expression: {e}")
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
def get_prime_factors_latex(n: int) -> str:
|
167
|
+
"""
|
168
|
+
Return the prime factors of a number as a LaTeX expression.
|
169
|
+
"""
|
170
|
+
factors = []
|
171
|
+
while n % 2 == 0:
|
172
|
+
factors.append(2)
|
173
|
+
n //= 2
|
174
|
+
for i in range(3, int(math.sqrt(n)) + 1, 2):
|
175
|
+
while n % i == 0:
|
176
|
+
factors.append(i)
|
177
|
+
n //= i
|
178
|
+
if n > 2:
|
179
|
+
factors.append(n)
|
180
|
+
|
181
|
+
factor_counts = {factor: factors.count(factor) for factor in set(factors)}
|
182
|
+
latex_factors = [f"{factor}^{{{count}}}" if count > 1 else str(factor) for factor, count in factor_counts.items()]
|
183
|
+
return " \\cdot ".join(latex_factors)
|
184
|
+
|
185
|
+
|
186
|
+
|
rgwfuncs/docs_lib.py
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
import os
|
2
|
+
import inspect
|
3
|
+
from typing import Tuple, Optional, Dict, Callable
|
4
|
+
import warnings
|
5
|
+
|
6
|
+
# Suppress all FutureWarnings
|
7
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
8
|
+
|
9
|
+
def docs(method_type_filter: Optional[str] = None) -> None:
|
10
|
+
"""
|
11
|
+
Print a list of function names in alphabetical order from all modules.
|
12
|
+
If method_type_filter is specified, print the docstrings of the functions
|
13
|
+
that match the filter. Using '*' as a filter will print the docstrings
|
14
|
+
for all functions.
|
15
|
+
|
16
|
+
Parameters:
|
17
|
+
method_type_filter: Optional filter string representing a function name,
|
18
|
+
or '*' to display docstrings for all functions.
|
19
|
+
"""
|
20
|
+
|
21
|
+
# Directory containing your modules
|
22
|
+
module_dir = os.path.dirname(__file__)
|
23
|
+
|
24
|
+
# Iterate over each file in the module directory
|
25
|
+
for filename in sorted(os.listdir(module_dir)):
|
26
|
+
if filename.endswith('.py') and filename != '__init__.py':
|
27
|
+
module_name, _ = os.path.splitext(filename)
|
28
|
+
print(f"\n# {module_name}.py")
|
29
|
+
|
30
|
+
# Import the module
|
31
|
+
module_path = f"rgwfuncs.{module_name}"
|
32
|
+
module = __import__(module_path, fromlist=[module_name])
|
33
|
+
|
34
|
+
# Get all functions from the module
|
35
|
+
functions = {
|
36
|
+
name: obj for name, obj
|
37
|
+
in inspect.getmembers(module, inspect.isfunction)
|
38
|
+
if obj.__module__ == module_path
|
39
|
+
}
|
40
|
+
|
41
|
+
# List function names
|
42
|
+
function_names = sorted(functions.keys())
|
43
|
+
for name in function_names:
|
44
|
+
print(f"{name}")
|
45
|
+
|
46
|
+
# If a filter is provided or '*', print the docstring
|
47
|
+
if method_type_filter and (method_type_filter == '*' or method_type_filter == name):
|
48
|
+
docstring: Optional[str] = functions[name].__doc__
|
49
|
+
if docstring:
|
50
|
+
print(f"\n{name}:\n{docstring}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.24
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -1002,22 +1002,31 @@ Print the cascading frequency of top n values for specified columns.
|
|
1002
1002
|
--------------------------------------------------------------------------------
|
1003
1003
|
|
1004
1004
|
### 37. `print_n_frequency_linear`
|
1005
|
-
Print the linear frequency of top n values for specified columns.
|
1006
1005
|
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1006
|
+
Prints the linear frequency of the top `n` values for specified columns.
|
1007
|
+
|
1008
|
+
#### Parameters:
|
1009
|
+
- **df** (`pd.DataFrame`): The DataFrame to analyze.
|
1010
|
+
- **n** (`int`): The number of top values to print for each column.
|
1011
|
+
- **columns** (`list`): A list of column names to be analyzed.
|
1012
|
+
- **order_by** (`str`): The order of frequency. The available options are:
|
1013
|
+
- `"ASC"`: Sort keys in ascending lexicographical order.
|
1014
|
+
- `"DESC"`: Sort keys in descending lexicographical order.
|
1015
|
+
- `"FREQ_ASC"`: Sort the frequencies in ascending order (least frequent first).
|
1016
|
+
- `"FREQ_DESC"`: Sort the frequencies in descending order (most frequent first).
|
1017
|
+
- `"BY_KEYS_ASC"`: Sort keys in ascending order, numerically if possible, handling special strings like 'NaN' as typical entries.
|
1018
|
+
- `"BY_KEYS_DESC"`: Sort keys in descending order, numerically if possible, handling special strings like 'NaN' as typical entries.
|
1019
|
+
|
1020
|
+
#### Example:
|
1012
1021
|
|
1013
|
-
• Example:
|
1014
|
-
|
1015
1022
|
from rgwfuncs import print_n_frequency_linear
|
1016
1023
|
import pandas as pd
|
1017
1024
|
|
1018
|
-
df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
|
1019
|
-
print_n_frequency_linear(df, 2, 'City', 'FREQ_DESC')
|
1020
|
-
|
1025
|
+
df = pd.DataFrame({'City': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
|
1026
|
+
print_n_frequency_linear(df, 2, ['City'], 'FREQ_DESC')
|
1027
|
+
|
1028
|
+
This example analyzes the `City` column, printing the top 2 most frequent values in descending order of frequency.
|
1029
|
+
|
1021
1030
|
|
1022
1031
|
--------------------------------------------------------------------------------
|
1023
1032
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=UrSka0KkoaZfLtODgbEbGvo67-L0LK1-e9waRn2a95g,1334
|
2
|
+
rgwfuncs/algebra_lib.py,sha256=aayZogB2Rp9JAo5kVHpauqX_R346eI_rIuE5QNEMlKM,7789
|
3
|
+
rgwfuncs/df_lib.py,sha256=8KMn4FucI19EFBHUoGOS7R4mo0degg6A6802sjy7BH4,67677
|
4
|
+
rgwfuncs/docs_lib.py,sha256=iZlQMNS52FuiblCI0oXJVznSuCndeG6WqZfsm-Xnd7U,1918
|
5
|
+
rgwfuncs/str_lib.py,sha256=I5B0WOGaLUGaedMG7hqiKnIqV7Jc9h1RYlgOiC_-iGY,3678
|
6
|
+
rgwfuncs-0.0.24.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
7
|
+
rgwfuncs-0.0.24.dist-info/METADATA,sha256=-Nv5cA1xWJNfDSp53_jkaTce5n93hAH7mS7bv2IFWes,35516
|
8
|
+
rgwfuncs-0.0.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
+
rgwfuncs-0.0.24.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
10
|
+
rgwfuncs-0.0.24.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
11
|
+
rgwfuncs-0.0.24.dist-info/RECORD,,
|
rgwfuncs-0.0.23.dist-info/RECORD
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=2nrp3c5VmVrKh0Ih6zELL8niH9nAHN0XnObqe-EpxlE,1169
|
2
|
-
rgwfuncs/df_lib.py,sha256=8KMn4FucI19EFBHUoGOS7R4mo0degg6A6802sjy7BH4,67677
|
3
|
-
rgwfuncs/str_lib.py,sha256=I5B0WOGaLUGaedMG7hqiKnIqV7Jc9h1RYlgOiC_-iGY,3678
|
4
|
-
rgwfuncs-0.0.23.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
5
|
-
rgwfuncs-0.0.23.dist-info/METADATA,sha256=_mVsZMv4umMXMW_Q2hBxABMm75pKuvJgMIMBldXxCtk,34680
|
6
|
-
rgwfuncs-0.0.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
7
|
-
rgwfuncs-0.0.23.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
8
|
-
rgwfuncs-0.0.23.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
9
|
-
rgwfuncs-0.0.23.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|