rgwfuncs 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/__init__.py +2 -2
- rgwfuncs/algebra_lib.py +117 -30
- rgwfuncs/df_lib.py +43 -51
- rgwfuncs/docs_lib.py +11 -10
- rgwfuncs/str_lib.py +8 -44
- {rgwfuncs-0.0.24.dist-info → rgwfuncs-0.0.26.dist-info}/METADATA +158 -65
- rgwfuncs-0.0.26.dist-info/RECORD +11 -0
- rgwfuncs-0.0.24.dist-info/RECORD +0 -11
- {rgwfuncs-0.0.24.dist-info → rgwfuncs-0.0.26.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.24.dist-info → rgwfuncs-0.0.26.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.24.dist-info → rgwfuncs-0.0.26.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.24.dist-info → rgwfuncs-0.0.26.dist-info}/top_level.txt +0 -0
rgwfuncs/__init__.py
CHANGED
@@ -2,6 +2,6 @@
|
|
2
2
|
# Dynamically importing functions from modules
|
3
3
|
|
4
4
|
from .algebra_lib import compute_algebraic_expression, get_prime_factors_latex, simplify_algebraic_expression, solve_algebraic_expression
|
5
|
-
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows,
|
5
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
6
6
|
from .docs_lib import docs
|
7
|
-
from .str_lib import send_telegram_message
|
7
|
+
from .str_lib import send_telegram_message
|
rgwfuncs/algebra_lib.py
CHANGED
@@ -1,10 +1,29 @@
|
|
1
1
|
import re
|
2
2
|
import math
|
3
|
+
import ast
|
3
4
|
from sympy import symbols, latex, simplify, solve, diff, Expr
|
4
5
|
from sympy.parsing.sympy_parser import parse_expr
|
5
6
|
from typing import Tuple, List, Dict, Optional
|
6
7
|
|
8
|
+
|
7
9
|
def compute_algebraic_expression(expression: str) -> float:
|
10
|
+
"""
|
11
|
+
Computes the numerical result of a given algebraic expression.
|
12
|
+
|
13
|
+
Evaluates an algebraic expression provided as a string and returns the computed result.
|
14
|
+
Supports various arithmetic operations, including addition, subtraction, multiplication,
|
15
|
+
division, and modulo, as well as mathematical functions from the math module.
|
16
|
+
|
17
|
+
Parameters:
|
18
|
+
expression (str): The algebraic expression to compute. This should be a string consisting
|
19
|
+
of arithmetic operations and supported math module functions.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
float: The evaluated numerical result of the expression.
|
23
|
+
|
24
|
+
Raises:
|
25
|
+
ValueError: If the expression cannot be evaluated due to syntax errors or other issues.
|
26
|
+
"""
|
8
27
|
try:
|
9
28
|
# Direct numerical evaluation
|
10
29
|
# Safely evaluate the expression using the math module
|
@@ -15,10 +34,23 @@ def compute_algebraic_expression(expression: str) -> float:
|
|
15
34
|
except Exception as e:
|
16
35
|
raise ValueError(f"Error computing expression: {e}")
|
17
36
|
|
37
|
+
|
18
38
|
def simplify_algebraic_expression(expression: str) -> str:
|
39
|
+
"""
|
40
|
+
Simplifies an algebraic expression and returns it in LaTeX format.
|
41
|
+
|
42
|
+
Takes an algebraic expression written in Python syntax and simplifies it. The result is
|
43
|
+
returned as a LaTeX formatted string, suitable for academic or professional documentation.
|
19
44
|
|
45
|
+
Parameters:
|
46
|
+
expression (str): The algebraic expression to simplify.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
str: The simplified expression represented as a LaTeX string.
|
50
|
+
"""
|
20
51
|
|
21
|
-
def recursive_parse_function_call(
|
52
|
+
def recursive_parse_function_call(
|
53
|
+
func_call: str, prefix: str, sym_vars: Dict[str, Expr]) -> Tuple[str, List[Expr]]:
|
22
54
|
# print(f"Parsing function call: {func_call}")
|
23
55
|
|
24
56
|
# Match the function name and arguments
|
@@ -30,7 +62,8 @@ def simplify_algebraic_expression(expression: str) -> str:
|
|
30
62
|
args_str = match.group(2)
|
31
63
|
|
32
64
|
# Check if it's a list for np
|
33
|
-
if prefix == 'np' and args_str.startswith(
|
65
|
+
if prefix == 'np' and args_str.startswith(
|
66
|
+
"[") and args_str.endswith("]"):
|
34
67
|
parsed_args = [ast.literal_eval(args_str.strip())]
|
35
68
|
else:
|
36
69
|
parsed_args = []
|
@@ -38,77 +71,98 @@ def simplify_algebraic_expression(expression: str) -> str:
|
|
38
71
|
for arg in raw_args:
|
39
72
|
arg = arg.strip()
|
40
73
|
if re.match(r'\w+\.\w+\(', arg):
|
41
|
-
# Recursively evaluate the argument if it's another
|
42
|
-
|
43
|
-
|
74
|
+
# Recursively evaluate the argument if it's another
|
75
|
+
# function call
|
76
|
+
arg_val = recursive_eval_func(
|
77
|
+
re.match(r'\w+\.\w+\(.*\)', arg), sym_vars)
|
78
|
+
parsed_args.append(
|
79
|
+
parse_expr(
|
80
|
+
arg_val,
|
81
|
+
local_dict=sym_vars))
|
44
82
|
else:
|
45
83
|
parsed_args.append(parse_expr(arg, local_dict=sym_vars))
|
46
84
|
|
47
85
|
# print(f"Function name: {func_name}, Parsed arguments: {parsed_args}")
|
48
86
|
return func_name, parsed_args
|
49
87
|
|
50
|
-
|
51
88
|
def recursive_eval_func(match: re.Match, sym_vars: Dict[str, Expr]) -> str:
|
52
89
|
# print("152", match)
|
53
90
|
func_call = match.group(0)
|
54
91
|
# print(f"153 Evaluating function call: {func_call}")
|
55
92
|
|
56
93
|
if func_call.startswith("np."):
|
57
|
-
func_name, args = recursive_parse_function_call(
|
94
|
+
func_name, args = recursive_parse_function_call(
|
95
|
+
func_call, 'np', sym_vars)
|
58
96
|
if func_name == 'diff':
|
59
97
|
expr = args[0]
|
60
98
|
if isinstance(expr, list):
|
61
99
|
# Calculate discrete difference
|
62
|
-
diff_result = [expr[i] - expr[i - 1]
|
100
|
+
diff_result = [expr[i] - expr[i - 1]
|
101
|
+
for i in range(1, len(expr))]
|
63
102
|
return str(diff_result)
|
64
103
|
# Perform symbolic differentiation
|
65
104
|
diff_result = diff(expr)
|
66
105
|
return str(diff_result)
|
67
106
|
|
68
107
|
if func_call.startswith("math."):
|
69
|
-
func_name, args = recursive_parse_function_call(
|
108
|
+
func_name, args = recursive_parse_function_call(
|
109
|
+
func_call, 'math', sym_vars)
|
70
110
|
if hasattr(math, func_name):
|
71
111
|
result = getattr(math, func_name)(*args)
|
72
112
|
return str(result)
|
73
113
|
|
74
114
|
if func_call.startswith("sym."):
|
75
|
-
initial_method_match = re.match(
|
115
|
+
initial_method_match = re.match(
|
116
|
+
r'(sym\.\w+\([^()]*\))(\.(\w+)\((.*?)\))*', func_call, re.DOTALL)
|
76
117
|
if initial_method_match:
|
77
118
|
base_expr_str = initial_method_match.group(1)
|
78
|
-
base_func_name, base_args = recursive_parse_function_call(
|
119
|
+
base_func_name, base_args = recursive_parse_function_call(
|
120
|
+
base_expr_str, 'sym', sym_vars)
|
79
121
|
if base_func_name == 'solve':
|
80
122
|
solutions = solve(base_args[0], base_args[1])
|
81
123
|
# print(f"Solutions found: {solutions}")
|
82
124
|
|
83
|
-
method_chain = re.findall(
|
84
|
-
|
125
|
+
method_chain = re.findall(
|
126
|
+
r'\.(\w+)\((.*?)\)', func_call, re.DOTALL)
|
127
|
+
final_solutions = [execute_chained_methods(sol, [(m, [method_args.strip(
|
128
|
+
)]) for m, method_args in method_chain], sym_vars) for sol in solutions]
|
85
129
|
|
86
|
-
return "[" + ",".join(latex(simplify(sol))
|
130
|
+
return "[" + ",".join(latex(simplify(sol))
|
131
|
+
for sol in final_solutions) + "]"
|
87
132
|
|
88
133
|
raise ValueError(f"Unknown function call: {func_call}")
|
89
134
|
|
90
|
-
def execute_chained_methods(sym_expr: Expr,
|
135
|
+
def execute_chained_methods(sym_expr: Expr,
|
136
|
+
method_chain: List[Tuple[str,
|
137
|
+
List[str]]],
|
138
|
+
sym_vars: Dict[str,
|
139
|
+
Expr]) -> Expr:
|
91
140
|
for method_name, method_args in method_chain:
|
92
141
|
# print(f"Executing method: {method_name} with arguments: {method_args}")
|
93
142
|
method = getattr(sym_expr, method_name, None)
|
94
143
|
if method:
|
95
144
|
if method_name == 'subs' and isinstance(method_args[0], dict):
|
96
145
|
kwargs = method_args[0]
|
97
|
-
kwargs = {
|
146
|
+
kwargs = {
|
147
|
+
parse_expr(
|
148
|
+
k,
|
149
|
+
local_dict=sym_vars): parse_expr(
|
150
|
+
v,
|
151
|
+
local_dict=sym_vars) for k,
|
152
|
+
v in kwargs.items()}
|
98
153
|
sym_expr = method(kwargs)
|
99
154
|
else:
|
100
|
-
args = [parse_expr(arg.strip(), local_dict=sym_vars)
|
155
|
+
args = [parse_expr(arg.strip(), local_dict=sym_vars)
|
156
|
+
for arg in method_args]
|
101
157
|
sym_expr = method(*args)
|
102
158
|
# print(f"Result after {method_name}: {sym_expr}")
|
103
159
|
return sym_expr
|
104
160
|
|
105
|
-
|
106
|
-
|
107
161
|
variable_names = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
|
108
162
|
sym_vars = {var: symbols(var) for var in variable_names}
|
109
163
|
|
110
164
|
patterns = {
|
111
|
-
#"numpy_diff_brackets": r"np\.diff\(\[.*?\]\)",
|
165
|
+
# "numpy_diff_brackets": r"np\.diff\(\[.*?\]\)",
|
112
166
|
"numpy_diff_no_brackets": r"np\.diff\([^()]*\)",
|
113
167
|
"math_functions": r"math\.\w+\((?:[^()]*(?:\([^()]*\)[^()]*)*)\)",
|
114
168
|
# "sympy_functions": r"sym\.\w+\([^()]*\)(?:\.\w+\([^()]*\))?",
|
@@ -117,11 +171,14 @@ def simplify_algebraic_expression(expression: str) -> str:
|
|
117
171
|
function_pattern = '|'.join(patterns.values())
|
118
172
|
|
119
173
|
# Use a lambda function to pass additional arguments
|
120
|
-
processed_expression = re.sub(
|
174
|
+
processed_expression = re.sub(
|
175
|
+
function_pattern, lambda match: recursive_eval_func(
|
176
|
+
match, sym_vars), expression)
|
121
177
|
# print("Level 2 processed_expression:", processed_expression)
|
122
178
|
|
123
179
|
try:
|
124
|
-
if processed_expression.startswith(
|
180
|
+
if processed_expression.startswith(
|
181
|
+
'[') and processed_expression.endswith(']'):
|
125
182
|
return processed_expression
|
126
183
|
|
127
184
|
expr = parse_expr(processed_expression, local_dict=sym_vars)
|
@@ -136,7 +193,28 @@ def simplify_algebraic_expression(expression: str) -> str:
|
|
136
193
|
except Exception as e:
|
137
194
|
raise ValueError(f"Error simplifying expression: {e}")
|
138
195
|
|
139
|
-
|
196
|
+
|
197
|
+
def solve_algebraic_expression(
|
198
|
+
expression: str, variable: str, subs: Optional[Dict[str, float]] = None) -> str:
|
199
|
+
"""
|
200
|
+
Solves an algebraic equation for a specified variable and returns solutions in LaTeX format.
|
201
|
+
|
202
|
+
Solves the given equation for a designated variable. May optionally include substitutions
|
203
|
+
for other variables in the equation. The solutions are provided as a LaTeX formatted string.
|
204
|
+
|
205
|
+
Parameters:
|
206
|
+
expression (str): The algebraic equation to solve.
|
207
|
+
variable (str): The variable to solve the equation for.
|
208
|
+
subs (Optional[Dict[str, float]]): An optional dictionary of substitutions for variables
|
209
|
+
in the equation.
|
210
|
+
|
211
|
+
Returns:
|
212
|
+
str: The solutions of the equation, formatted as a LaTeX string.
|
213
|
+
|
214
|
+
Raises:
|
215
|
+
ValueError: If the equation cannot be solved due to errors in expression or parameters.
|
216
|
+
"""
|
217
|
+
|
140
218
|
try:
|
141
219
|
# Create symbols for the variables in the expression
|
142
220
|
variable_symbols = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
|
@@ -153,7 +231,9 @@ def solve_algebraic_expression(expression: str, variable: str, subs: Optional[Di
|
|
153
231
|
solutions = [simplify(sol.subs(subs_symbols)) for sol in solutions]
|
154
232
|
|
155
233
|
# Convert solutions to LaTeX strings if possible
|
156
|
-
latex_solutions = [
|
234
|
+
latex_solutions = [
|
235
|
+
latex(
|
236
|
+
simplify(sol)) if sol.free_symbols else str(sol) for sol in solutions]
|
157
237
|
result = r"\left[" + ", ".join(latex_solutions) + r"\right]"
|
158
238
|
print("158", result)
|
159
239
|
return result
|
@@ -162,11 +242,20 @@ def solve_algebraic_expression(expression: str, variable: str, subs: Optional[Di
|
|
162
242
|
raise ValueError(f"Error solving the expression: {e}")
|
163
243
|
|
164
244
|
|
165
|
-
|
166
245
|
def get_prime_factors_latex(n: int) -> str:
|
167
246
|
"""
|
168
|
-
|
247
|
+
Computes the prime factors of a number and returns the factorization as a LaTeX string.
|
248
|
+
|
249
|
+
Determines the prime factorization of the given integer. The result is formatted as a LaTeX
|
250
|
+
string, enabling easy integration into documents or presentations that require mathematical notation.
|
251
|
+
|
252
|
+
Parameters:
|
253
|
+
n (int): The number for which to compute prime factors.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
str: The LaTeX representation of the prime factorization.
|
169
257
|
"""
|
258
|
+
|
170
259
|
factors = []
|
171
260
|
while n % 2 == 0:
|
172
261
|
factors.append(2)
|
@@ -179,8 +268,6 @@ def get_prime_factors_latex(n: int) -> str:
|
|
179
268
|
factors.append(n)
|
180
269
|
|
181
270
|
factor_counts = {factor: factors.count(factor) for factor in set(factors)}
|
182
|
-
latex_factors = [f"{factor}^{{{count}}}" if count > 1 else str(
|
271
|
+
latex_factors = [f"{factor}^{{{count}}}" if count > 1 else str(
|
272
|
+
factor) for factor, count in factor_counts.items()]
|
183
273
|
return " \\cdot ".join(latex_factors)
|
184
|
-
|
185
|
-
|
186
|
-
|
rgwfuncs/df_lib.py
CHANGED
@@ -21,51 +21,14 @@ from email.mime.base import MIMEBase
|
|
21
21
|
from email import encoders
|
22
22
|
from googleapiclient.discovery import build
|
23
23
|
import base64
|
24
|
-
import inspect
|
25
|
-
from typing import Optional,
|
24
|
+
# import inspect
|
25
|
+
from typing import Optional, Dict, List, Tuple, Any
|
26
26
|
import warnings
|
27
27
|
|
28
28
|
# Suppress all FutureWarnings
|
29
29
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
30
30
|
|
31
31
|
|
32
|
-
def df_docs(method_type_filter: Optional[str] = None) -> None:
|
33
|
-
"""
|
34
|
-
Print a list of function names in alphabetical order. If method_type_filter
|
35
|
-
is specified, print the docstrings of the functions that match the filter.
|
36
|
-
Using '*' as a filter will print the docstrings for all functions.
|
37
|
-
|
38
|
-
Parameters:
|
39
|
-
method_type_filter: Optional filter string representing a function name,
|
40
|
-
or '*' to display docstrings for all functions.
|
41
|
-
"""
|
42
|
-
# Get the current module's namespace
|
43
|
-
current_module = __name__
|
44
|
-
|
45
|
-
local_functions: Dict[str, Callable] = {
|
46
|
-
name: obj for name, obj in globals().items()
|
47
|
-
if inspect.isfunction(obj) and obj.__module__ == current_module
|
48
|
-
}
|
49
|
-
|
50
|
-
# List of function names sorted alphabetically
|
51
|
-
function_names = sorted(local_functions.keys())
|
52
|
-
|
53
|
-
# Print function names
|
54
|
-
print("Functions in alphabetical order:")
|
55
|
-
for name in function_names:
|
56
|
-
print(name)
|
57
|
-
|
58
|
-
# If a filter is provided or '*', print the docstrings of functions
|
59
|
-
if method_type_filter:
|
60
|
-
# print("\nFiltered function documentation:")
|
61
|
-
for name, func in local_functions.items():
|
62
|
-
docstring: Optional[str] = func.__doc__
|
63
|
-
if docstring:
|
64
|
-
if method_type_filter == '*' or method_type_filter == name:
|
65
|
-
# Print the entire docstring for the matching function
|
66
|
-
print(f"\n{name}:\n{docstring}")
|
67
|
-
|
68
|
-
|
69
32
|
def numeric_clean(
|
70
33
|
df: pd.DataFrame,
|
71
34
|
column_names: str,
|
@@ -835,7 +798,12 @@ def print_dataframe(df: pd.DataFrame, source: Optional[str] = None) -> None:
|
|
835
798
|
gc.collect()
|
836
799
|
|
837
800
|
|
838
|
-
def send_dataframe_via_telegram(
|
801
|
+
def send_dataframe_via_telegram(
|
802
|
+
df: pd.DataFrame,
|
803
|
+
bot_name: str,
|
804
|
+
message: Optional[str] = None,
|
805
|
+
as_file: bool = True,
|
806
|
+
remove_after_send: bool = True) -> None:
|
839
807
|
"""
|
840
808
|
Send a DataFrame via Telegram using a specified bot configuration.
|
841
809
|
|
@@ -1672,7 +1640,12 @@ def print_n_frequency_cascading(
|
|
1672
1640
|
report = generate_cascade_report(df, columns, n, order_by)
|
1673
1641
|
print(json.dumps(report, indent=2))
|
1674
1642
|
|
1675
|
-
|
1643
|
+
|
1644
|
+
def print_n_frequency_linear(
|
1645
|
+
df: pd.DataFrame,
|
1646
|
+
n: int,
|
1647
|
+
columns: list,
|
1648
|
+
order_by: str = "FREQ_DESC") -> None:
|
1676
1649
|
"""
|
1677
1650
|
Print the linear frequency of top n values for specified columns.
|
1678
1651
|
|
@@ -1719,23 +1692,36 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: list, order_by:
|
|
1719
1692
|
return val
|
1720
1693
|
|
1721
1694
|
def sort_frequency(frequency, order_by):
|
1722
|
-
keys = frequency.keys()
|
1695
|
+
# keys = frequency.keys()
|
1723
1696
|
|
1724
|
-
# Convert keys to numerical values where possible, leaving `NaN` as a
|
1725
|
-
|
1697
|
+
# Convert keys to numerical values where possible, leaving `NaN` as a
|
1698
|
+
# special string
|
1699
|
+
# parsed_keys = [(try_parse_numeric(key), key) for key in keys]
|
1726
1700
|
|
1727
1701
|
if order_by in {"BY_KEYS_ASC", "BY_KEYS_DESC"}:
|
1728
1702
|
reverse = order_by == "BY_KEYS_DESC"
|
1729
|
-
sorted_items = sorted(
|
1703
|
+
sorted_items = sorted(
|
1704
|
+
frequency.items(),
|
1705
|
+
key=lambda item: try_parse_numeric(
|
1706
|
+
item[0]),
|
1707
|
+
reverse=reverse)
|
1730
1708
|
else:
|
1731
1709
|
if order_by == "ASC":
|
1732
|
-
sorted_items = sorted(
|
1710
|
+
sorted_items = sorted(
|
1711
|
+
frequency.items(), key=lambda item: item[0])
|
1733
1712
|
elif order_by == "DESC":
|
1734
|
-
sorted_items = sorted(
|
1713
|
+
sorted_items = sorted(
|
1714
|
+
frequency.items(),
|
1715
|
+
key=lambda item: item[0],
|
1716
|
+
reverse=True)
|
1735
1717
|
elif order_by == "FREQ_ASC":
|
1736
|
-
sorted_items = sorted(
|
1718
|
+
sorted_items = sorted(
|
1719
|
+
frequency.items(), key=lambda item: item[1])
|
1737
1720
|
else: # Default to "FREQ_DESC"
|
1738
|
-
sorted_items = sorted(
|
1721
|
+
sorted_items = sorted(
|
1722
|
+
frequency.items(),
|
1723
|
+
key=lambda item: item[1],
|
1724
|
+
reverse=True)
|
1739
1725
|
|
1740
1726
|
return dict(sorted_items)
|
1741
1727
|
|
@@ -1887,7 +1873,10 @@ def right_join(
|
|
1887
1873
|
return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
|
1888
1874
|
|
1889
1875
|
|
1890
|
-
def insert_dataframe_in_sqlite_database(
|
1876
|
+
def insert_dataframe_in_sqlite_database(
|
1877
|
+
db_path: str,
|
1878
|
+
tablename: str,
|
1879
|
+
df: pd.DataFrame) -> None:
|
1891
1880
|
"""
|
1892
1881
|
Inserts a Pandas DataFrame into a SQLite database table.
|
1893
1882
|
|
@@ -1949,7 +1938,10 @@ def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.Dat
|
|
1949
1938
|
df.to_sql(tablename, conn, if_exists='append', index=False)
|
1950
1939
|
|
1951
1940
|
|
1952
|
-
def sync_dataframe_to_sqlite_database(
|
1941
|
+
def sync_dataframe_to_sqlite_database(
|
1942
|
+
db_path: str,
|
1943
|
+
tablename: str,
|
1944
|
+
df: pd.DataFrame) -> None:
|
1953
1945
|
"""
|
1954
1946
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
1955
1947
|
and replacing the existing table if needed. Creates the table if it does not exist.
|
rgwfuncs/docs_lib.py
CHANGED
@@ -1,26 +1,27 @@
|
|
1
1
|
import os
|
2
2
|
import inspect
|
3
|
-
from typing import
|
3
|
+
from typing import Optional
|
4
4
|
import warnings
|
5
5
|
|
6
6
|
# Suppress all FutureWarnings
|
7
7
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
8
8
|
|
9
|
+
|
9
10
|
def docs(method_type_filter: Optional[str] = None) -> None:
|
10
11
|
"""
|
11
12
|
Print a list of function names in alphabetical order from all modules.
|
12
13
|
If method_type_filter is specified, print the docstrings of the functions
|
13
|
-
that match the filter. Using '*' as a filter will print
|
14
|
-
for all functions.
|
14
|
+
that match the filter based on a substring. Using '*' as a filter will print
|
15
|
+
the docstrings for all functions.
|
15
16
|
|
16
17
|
Parameters:
|
17
|
-
method_type_filter: Optional filter string representing a
|
18
|
-
or '*' to display docstrings for all functions.
|
18
|
+
method_type_filter: Optional filter string representing a filter for
|
19
|
+
function names, or '*' to display docstrings for all functions.
|
19
20
|
"""
|
20
21
|
|
21
22
|
# Directory containing your modules
|
22
23
|
module_dir = os.path.dirname(__file__)
|
23
|
-
|
24
|
+
|
24
25
|
# Iterate over each file in the module directory
|
25
26
|
for filename in sorted(os.listdir(module_dir)):
|
26
27
|
if filename.endswith('.py') and filename != '__init__.py':
|
@@ -41,10 +42,10 @@ def docs(method_type_filter: Optional[str] = None) -> None:
|
|
41
42
|
# List function names
|
42
43
|
function_names = sorted(functions.keys())
|
43
44
|
for name in function_names:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
# If a filter is provided or '*', check if the function name
|
46
|
+
# contains the filter
|
47
|
+
if method_type_filter and (
|
48
|
+
method_type_filter == '*' or method_type_filter in name):
|
48
49
|
docstring: Optional[str] = functions[name].__doc__
|
49
50
|
if docstring:
|
50
51
|
print(f"\n{name}:\n{docstring}")
|
rgwfuncs/str_lib.py
CHANGED
@@ -1,53 +1,16 @@
|
|
1
1
|
import os
|
2
2
|
import json
|
3
3
|
import requests
|
4
|
-
import
|
5
|
-
from typing import Tuple, Optional, Dict, Callable
|
4
|
+
from typing import Tuple
|
6
5
|
import warnings
|
7
6
|
|
8
7
|
# Suppress all FutureWarnings
|
9
8
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
10
9
|
|
11
10
|
|
12
|
-
def str_docs(method_type_filter: Optional[str] = None) -> None:
|
13
|
-
"""
|
14
|
-
Print a list of function names in alphabetical order. If method_type_filter
|
15
|
-
is specified, print the docstrings of the functions that match the filter.
|
16
|
-
Using '*' as a filter will print the docstrings for all functions.
|
17
|
-
|
18
|
-
Parameters:
|
19
|
-
method_type_filter: Optional filter string representing a function name,
|
20
|
-
or '*' to display docstrings for all functions.
|
21
|
-
"""
|
22
|
-
# Get the current module's namespace
|
23
|
-
current_module = __name__
|
24
|
-
|
25
|
-
local_functions: Dict[str, Callable] = {
|
26
|
-
name: obj for name, obj in globals().items()
|
27
|
-
if inspect.isfunction(obj) and obj.__module__ == current_module
|
28
|
-
}
|
29
|
-
|
30
|
-
# List of function names sorted alphabetically
|
31
|
-
function_names = sorted(local_functions.keys())
|
32
|
-
|
33
|
-
# Print function names
|
34
|
-
print("Functions in alphabetical order:")
|
35
|
-
for name in function_names:
|
36
|
-
print(name)
|
37
|
-
|
38
|
-
# If a filter is provided or '*', print the docstrings of functions
|
39
|
-
if method_type_filter:
|
40
|
-
# print("\nFiltered function documentation:")
|
41
|
-
for name, func in local_functions.items():
|
42
|
-
docstring: Optional[str] = func.__doc__
|
43
|
-
if docstring:
|
44
|
-
if method_type_filter == '*' or method_type_filter == name:
|
45
|
-
# Print the entire docstring for the matching function
|
46
|
-
print(f"\n{name}:\n{docstring}")
|
47
|
-
|
48
|
-
|
49
11
|
def send_telegram_message(preset_name: str, message: str) -> None:
|
50
|
-
"""
|
12
|
+
"""
|
13
|
+
Send a Telegram message using the specified preset.
|
51
14
|
|
52
15
|
Args:
|
53
16
|
preset_name (str): The name of the preset to use for sending the message.
|
@@ -73,19 +36,20 @@ def send_telegram_message(preset_name: str, message: str) -> None:
|
|
73
36
|
return preset
|
74
37
|
return None
|
75
38
|
|
76
|
-
def get_telegram_bot_details(
|
39
|
+
def get_telegram_bot_details(
|
40
|
+
config: dict, preset_name: str) -> Tuple[str, str]:
|
77
41
|
"""Retrieve the Telegram bot token and chat ID from the preset."""
|
78
42
|
preset = get_telegram_preset(config, preset_name)
|
79
43
|
if not preset:
|
80
|
-
raise RuntimeError(
|
44
|
+
raise RuntimeError(
|
45
|
+
f"Telegram bot preset '{preset_name}' not found in the configuration file")
|
81
46
|
|
82
47
|
bot_token = preset.get("bot_token")
|
83
48
|
chat_id = preset.get("chat_id")
|
84
49
|
|
85
50
|
if not bot_token or not chat_id:
|
86
51
|
raise RuntimeError(
|
87
|
-
f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file"
|
88
|
-
)
|
52
|
+
f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file")
|
89
53
|
|
90
54
|
return bot_token, chat_id
|
91
55
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.26
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -135,22 +135,126 @@ To display all docstrings, use:
|
|
135
135
|
|
136
136
|
--------------------------------------------------------------------------------
|
137
137
|
|
138
|
-
##
|
138
|
+
## Documentation Access Functions
|
139
139
|
|
140
|
-
### 1.
|
141
|
-
Print a list of available function names in alphabetical order. If a filter is provided, print the
|
140
|
+
### 1. docs
|
141
|
+
Print a list of available function names in alphabetical order. If a filter is provided, print the docstrings of functions containing the term.
|
142
142
|
|
143
143
|
• Parameters:
|
144
144
|
- `method_type_filter` (str): Optional, comma-separated to select docstring types, or '*' for all.
|
145
145
|
|
146
146
|
• Example:
|
147
147
|
|
148
|
-
import
|
149
|
-
|
148
|
+
from rgwfuncs import docs
|
149
|
+
docs(method_type_filter='numeric_clean,limit_dataframe')
|
150
|
+
|
151
|
+
--------------------------------------------------------------------------------
|
152
|
+
|
153
|
+
## Algebra Based Functions
|
154
|
+
|
155
|
+
This section provides comprehensive functions for handling algebraic expressions, performing tasks such as computation, simplification, solving equations, and prime factorization, all outputted in LaTeX format.
|
156
|
+
|
157
|
+
### 1. `compute_algebraic_expression`
|
158
|
+
|
159
|
+
Evaluates complex algebraic expressions and provides numerical results.
|
160
|
+
|
161
|
+
- **Parameters:**
|
162
|
+
- `expression` (str): A string representing an arithmetic operation.
|
163
|
+
|
164
|
+
- **Returns:**
|
165
|
+
- `float`: The computed numerical result.
|
166
|
+
|
167
|
+
- **Example:**
|
168
|
+
|
169
|
+
from rgwfuncs import compute_algebraic_expression
|
170
|
+
result1 = compute_algebraic_expression("2 + 2")
|
171
|
+
print(result1) # Output: 4.0
|
172
|
+
|
173
|
+
result2 = compute_algebraic_expression("10 % 3")
|
174
|
+
print(result2) # Output: 1.0
|
175
|
+
|
176
|
+
result3 = compute_algebraic_expression("math.gcd(36, 60) * math.sin(math.radians(45)) * 10000")
|
177
|
+
print(result3) # Output: 84852.8137423857
|
178
|
+
|
179
|
+
These examples illustrate the ability to handle basic arithmetic, the modulo operator, and functions utilizing the Python math module.
|
180
|
+
|
181
|
+
--------------------------------------------------------------------------------
|
182
|
+
|
183
|
+
### 2. `simplify_algebraic_expression`
|
184
|
+
|
185
|
+
Simplifies expressions and returns them in LaTeX format.
|
186
|
+
|
187
|
+
- **Parameters:**
|
188
|
+
- `expression` (str): A string of the expression to simplify.
|
189
|
+
|
190
|
+
- **Returns:**
|
191
|
+
- `str`: Simplified expression in LaTeX.
|
192
|
+
|
193
|
+
- **Example:**
|
194
|
+
|
195
|
+
from rgwfuncs import simplify_algebraic_expression
|
196
|
+
simplified_expr1 = simplify_algebraic_expression("2*x + 3*x")
|
197
|
+
print(simplified_expr1) # Output: "5 x"
|
198
|
+
|
199
|
+
simplified_expr2 = simplify_algebraic_expression("(np.diff(3*x**8)) / (np.diff(8*x**30) * 11*y**3)")
|
200
|
+
print(simplified_expr2) # Output: "\frac{1}{110 x^{22} y^{3}}"
|
201
|
+
|
202
|
+
These examples demonstrate simplification of polynomial expressions and more complex ratios involving derivatives.
|
203
|
+
|
204
|
+
--------------------------------------------------------------------------------
|
205
|
+
|
206
|
+
### 3. `solve_algebraic_expression`
|
207
|
+
|
208
|
+
Solves equations for specified variables, with optional substitutions, returning LaTeX-formatted solutions.
|
209
|
+
|
210
|
+
- **Parameters:**
|
211
|
+
- `expression` (str): A string of the equation to solve.
|
212
|
+
- `variable` (str): The variable to solve for.
|
213
|
+
- `subs` (Optional[Dict[str, float]]): Substitutions for variables.
|
214
|
+
|
215
|
+
- **Returns:**
|
216
|
+
- `str`: Solutions formatted in LaTeX.
|
217
|
+
|
218
|
+
- **Example:**
|
219
|
+
|
220
|
+
from rgwfuncs import solve_algebraic_expression
|
221
|
+
solutions1 = solve_algebraic_expression("a*x**2 + b*x + c", "x", {"a": 3, "b": 7, "c": 5})
|
222
|
+
print(solutions1) # Output: "\left[-7/6 - sqrt(11)*I/6, -7/6 + sqrt(11)*I/6\right]"
|
223
|
+
|
224
|
+
solutions2 = solve_algebraic_expression("x**2 - 4", "x")
|
225
|
+
print(solutions2) # Output: "\left[-2, 2\right]"
|
226
|
+
|
227
|
+
Here, we solve both a quadratic equation with complex solutions and a simpler polynomial equation.
|
150
228
|
|
151
229
|
--------------------------------------------------------------------------------
|
152
230
|
|
153
|
-
###
|
231
|
+
### 4. `get_prime_factors_latex`
|
232
|
+
|
233
|
+
Computes prime factors of a number and presents them in LaTeX format.
|
234
|
+
|
235
|
+
- **Parameters:**
|
236
|
+
- `n` (int): The integer to factorize.
|
237
|
+
|
238
|
+
- **Returns:**
|
239
|
+
- `str`: Prime factorization in LaTeX.
|
240
|
+
|
241
|
+
- **Example:**
|
242
|
+
|
243
|
+
from rgwfuncs import get_prime_factors_latex
|
244
|
+
factors1 = get_prime_factors_latex(100)
|
245
|
+
print(factors1) # Output: "2^{2} \cdot 5^{2}"
|
246
|
+
|
247
|
+
factors2 = get_prime_factors_latex(60)
|
248
|
+
print(factors2) # Output: "2^{2} \cdot 3 \cdot 5"
|
249
|
+
|
250
|
+
factors3 = get_prime_factors_latex(17)
|
251
|
+
print(factors3) # Output: "17"
|
252
|
+
|
253
|
+
--------------------------------------------------------------------------------
|
254
|
+
|
255
|
+
## String Based Functions
|
256
|
+
|
257
|
+
### 1. send_telegram_message
|
154
258
|
|
155
259
|
Send a message to a Telegram chat using a specified preset from your configuration file.
|
156
260
|
|
@@ -176,20 +280,7 @@ Send a message to a Telegram chat using a specified preset from your configurati
|
|
176
280
|
|
177
281
|
Below is a quick reference of available functions, their purpose, and basic usage examples.
|
178
282
|
|
179
|
-
### 1.
|
180
|
-
Print a list of available function names in alphabetical order. If a filter is provided, print the matching docstrings.
|
181
|
-
|
182
|
-
• Parameters:
|
183
|
-
- `method_type_filter` (str): Optional, comma-separated to select docstring types, or '*' for all.
|
184
|
-
|
185
|
-
• Example:
|
186
|
-
|
187
|
-
import rgwfuncs
|
188
|
-
rgwfuncs.df_docs(method_type_filter='numeric_clean,limit_dataframe')
|
189
|
-
|
190
|
-
--------------------------------------------------------------------------------
|
191
|
-
|
192
|
-
### 2. `numeric_clean`
|
283
|
+
### 1. `numeric_clean`
|
193
284
|
Cleans the numeric columns in a DataFrame according to specified treatments.
|
194
285
|
|
195
286
|
• Parameters:
|
@@ -218,7 +309,7 @@ Cleans the numeric columns in a DataFrame according to specified treatments.
|
|
218
309
|
|
219
310
|
--------------------------------------------------------------------------------
|
220
311
|
|
221
|
-
###
|
312
|
+
### 2. `limit_dataframe`
|
222
313
|
Limit the DataFrame to a specified number of rows.
|
223
314
|
|
224
315
|
• Parameters:
|
@@ -239,7 +330,7 @@ Limit the DataFrame to a specified number of rows.
|
|
239
330
|
|
240
331
|
--------------------------------------------------------------------------------
|
241
332
|
|
242
|
-
###
|
333
|
+
### 3. `from_raw_data`
|
243
334
|
Create a DataFrame from raw data.
|
244
335
|
|
245
336
|
• Parameters:
|
@@ -265,7 +356,7 @@ Create a DataFrame from raw data.
|
|
265
356
|
|
266
357
|
--------------------------------------------------------------------------------
|
267
358
|
|
268
|
-
###
|
359
|
+
### 4. `append_rows`
|
269
360
|
Append rows to the DataFrame.
|
270
361
|
|
271
362
|
• Parameters:
|
@@ -290,7 +381,7 @@ Append rows to the DataFrame.
|
|
290
381
|
|
291
382
|
--------------------------------------------------------------------------------
|
292
383
|
|
293
|
-
###
|
384
|
+
### 5. `append_columns`
|
294
385
|
Append new columns to the DataFrame with None values.
|
295
386
|
|
296
387
|
• Parameters:
|
@@ -311,7 +402,7 @@ Append new columns to the DataFrame with None values.
|
|
311
402
|
|
312
403
|
--------------------------------------------------------------------------------
|
313
404
|
|
314
|
-
###
|
405
|
+
### 6. `update_rows`
|
315
406
|
Update specific rows in the DataFrame based on a condition.
|
316
407
|
|
317
408
|
• Parameters:
|
@@ -333,7 +424,7 @@ Update specific rows in the DataFrame based on a condition.
|
|
333
424
|
|
334
425
|
--------------------------------------------------------------------------------
|
335
426
|
|
336
|
-
###
|
427
|
+
### 7. `delete_rows`
|
337
428
|
Delete rows from the DataFrame based on a condition.
|
338
429
|
|
339
430
|
• Parameters:
|
@@ -354,7 +445,7 @@ Delete rows from the DataFrame based on a condition.
|
|
354
445
|
|
355
446
|
--------------------------------------------------------------------------------
|
356
447
|
|
357
|
-
###
|
448
|
+
### 8. `drop_duplicates`
|
358
449
|
Drop duplicate rows in the DataFrame, retaining the first occurrence.
|
359
450
|
|
360
451
|
• Parameters:
|
@@ -374,7 +465,7 @@ Drop duplicate rows in the DataFrame, retaining the first occurrence.
|
|
374
465
|
|
375
466
|
--------------------------------------------------------------------------------
|
376
467
|
|
377
|
-
###
|
468
|
+
### 9. `drop_duplicates_retain_first`
|
378
469
|
Drop duplicate rows based on specified columns, retaining the first occurrence.
|
379
470
|
|
380
471
|
• Parameters:
|
@@ -395,7 +486,7 @@ Drop duplicate rows based on specified columns, retaining the first occurrence.
|
|
395
486
|
|
396
487
|
--------------------------------------------------------------------------------
|
397
488
|
|
398
|
-
###
|
489
|
+
### 10. `drop_duplicates_retain_last`
|
399
490
|
Drop duplicate rows based on specified columns, retaining the last occurrence.
|
400
491
|
|
401
492
|
• Parameters:
|
@@ -417,7 +508,7 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
417
508
|
|
418
509
|
--------------------------------------------------------------------------------
|
419
510
|
|
420
|
-
###
|
511
|
+
### 11. `load_data_from_query`
|
421
512
|
|
422
513
|
Load data from a database query into a DataFrame based on a configuration preset.
|
423
514
|
|
@@ -444,7 +535,7 @@ Load data from a database query into a DataFrame based on a configuration preset
|
|
444
535
|
|
445
536
|
--------------------------------------------------------------------------------
|
446
537
|
|
447
|
-
###
|
538
|
+
### 12. `load_data_from_path`
|
448
539
|
Load data from a file into a DataFrame based on the file extension.
|
449
540
|
|
450
541
|
• Parameters:
|
@@ -463,7 +554,7 @@ Load data from a file into a DataFrame based on the file extension.
|
|
463
554
|
|
464
555
|
--------------------------------------------------------------------------------
|
465
556
|
|
466
|
-
###
|
557
|
+
### 13. `load_data_from_sqlite_path`
|
467
558
|
Execute a query on a SQLite database file and return the results as a DataFrame.
|
468
559
|
|
469
560
|
• Parameters:
|
@@ -483,7 +574,7 @@ Execute a query on a SQLite database file and return the results as a DataFrame.
|
|
483
574
|
|
484
575
|
--------------------------------------------------------------------------------
|
485
576
|
|
486
|
-
###
|
577
|
+
### 14. `first_n_rows`
|
487
578
|
Display the first n rows of the DataFrame (prints out in dictionary format).
|
488
579
|
|
489
580
|
• Parameters:
|
@@ -501,7 +592,7 @@ Display the first n rows of the DataFrame (prints out in dictionary format).
|
|
501
592
|
|
502
593
|
--------------------------------------------------------------------------------
|
503
594
|
|
504
|
-
###
|
595
|
+
### 15. `last_n_rows`
|
505
596
|
Display the last n rows of the DataFrame (prints out in dictionary format).
|
506
597
|
|
507
598
|
• Parameters:
|
@@ -519,7 +610,7 @@ Display the last n rows of the DataFrame (prints out in dictionary format).
|
|
519
610
|
|
520
611
|
--------------------------------------------------------------------------------
|
521
612
|
|
522
|
-
###
|
613
|
+
### 16. `top_n_unique_values`
|
523
614
|
Print the top n unique values for specified columns in the DataFrame.
|
524
615
|
|
525
616
|
• Parameters:
|
@@ -538,7 +629,7 @@ Print the top n unique values for specified columns in the DataFrame.
|
|
538
629
|
|
539
630
|
--------------------------------------------------------------------------------
|
540
631
|
|
541
|
-
###
|
632
|
+
### 17. `bottom_n_unique_values`
|
542
633
|
Print the bottom n unique values for specified columns in the DataFrame.
|
543
634
|
|
544
635
|
• Parameters:
|
@@ -557,7 +648,7 @@ Print the bottom n unique values for specified columns in the DataFrame.
|
|
557
648
|
|
558
649
|
--------------------------------------------------------------------------------
|
559
650
|
|
560
|
-
###
|
651
|
+
### 18. `print_correlation`
|
561
652
|
Print correlation for multiple pairs of columns in the DataFrame.
|
562
653
|
|
563
654
|
• Parameters:
|
@@ -582,7 +673,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
|
|
582
673
|
|
583
674
|
--------------------------------------------------------------------------------
|
584
675
|
|
585
|
-
###
|
676
|
+
### 19. `print_memory_usage`
|
586
677
|
Print the memory usage of the DataFrame in megabytes.
|
587
678
|
|
588
679
|
• Parameters:
|
@@ -599,7 +690,7 @@ Print the memory usage of the DataFrame in megabytes.
|
|
599
690
|
|
600
691
|
--------------------------------------------------------------------------------
|
601
692
|
|
602
|
-
###
|
693
|
+
### 20. `filter_dataframe`
|
603
694
|
Return a new DataFrame filtered by a given query expression.
|
604
695
|
|
605
696
|
• Parameters:
|
@@ -625,7 +716,7 @@ Return a new DataFrame filtered by a given query expression.
|
|
625
716
|
|
626
717
|
--------------------------------------------------------------------------------
|
627
718
|
|
628
|
-
###
|
719
|
+
### 21. `filter_indian_mobiles`
|
629
720
|
Filter and return rows containing valid Indian mobile numbers in the specified column.
|
630
721
|
|
631
722
|
• Parameters:
|
@@ -647,7 +738,7 @@ Filter and return rows containing valid Indian mobile numbers in the specified c
|
|
647
738
|
|
648
739
|
--------------------------------------------------------------------------------
|
649
740
|
|
650
|
-
###
|
741
|
+
### 22. `print_dataframe`
|
651
742
|
Print the entire DataFrame and its column types. Optionally print a source path.
|
652
743
|
|
653
744
|
• Parameters:
|
@@ -665,7 +756,7 @@ Print the entire DataFrame and its column types. Optionally print a source path.
|
|
665
756
|
|
666
757
|
--------------------------------------------------------------------------------
|
667
758
|
|
668
|
-
###
|
759
|
+
### 23. `send_dataframe_via_telegram`
|
669
760
|
Send a DataFrame via Telegram using a specified bot configuration.
|
670
761
|
|
671
762
|
• Parameters:
|
@@ -692,7 +783,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
692
783
|
|
693
784
|
--------------------------------------------------------------------------------
|
694
785
|
|
695
|
-
###
|
786
|
+
### 24. `send_data_to_email`
|
696
787
|
Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
|
697
788
|
|
698
789
|
• Parameters:
|
@@ -722,7 +813,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
722
813
|
|
723
814
|
--------------------------------------------------------------------------------
|
724
815
|
|
725
|
-
###
|
816
|
+
### 25. `send_data_to_slack`
|
726
817
|
Send a DataFrame or message to Slack using a specified bot configuration.
|
727
818
|
|
728
819
|
• Parameters:
|
@@ -748,7 +839,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
748
839
|
|
749
840
|
--------------------------------------------------------------------------------
|
750
841
|
|
751
|
-
###
|
842
|
+
### 26. `order_columns`
|
752
843
|
Reorder the columns of a DataFrame based on a string input.
|
753
844
|
|
754
845
|
• Parameters:
|
@@ -770,7 +861,7 @@ Reorder the columns of a DataFrame based on a string input.
|
|
770
861
|
|
771
862
|
--------------------------------------------------------------------------------
|
772
863
|
|
773
|
-
###
|
864
|
+
### 27. `append_ranged_classification_column`
|
774
865
|
Append a ranged classification column to the DataFrame.
|
775
866
|
|
776
867
|
• Parameters:
|
@@ -794,7 +885,7 @@ Append a ranged classification column to the DataFrame.
|
|
794
885
|
|
795
886
|
--------------------------------------------------------------------------------
|
796
887
|
|
797
|
-
###
|
888
|
+
### 28. `append_percentile_classification_column`
|
798
889
|
Append a percentile classification column to the DataFrame.
|
799
890
|
|
800
891
|
• Parameters:
|
@@ -818,7 +909,7 @@ Append a percentile classification column to the DataFrame.
|
|
818
909
|
|
819
910
|
--------------------------------------------------------------------------------
|
820
911
|
|
821
|
-
###
|
912
|
+
### 29. `append_ranged_date_classification_column`
|
822
913
|
Append a ranged date classification column to the DataFrame.
|
823
914
|
|
824
915
|
• Parameters:
|
@@ -847,7 +938,7 @@ Append a ranged date classification column to the DataFrame.
|
|
847
938
|
|
848
939
|
--------------------------------------------------------------------------------
|
849
940
|
|
850
|
-
###
|
941
|
+
### 30. `rename_columns`
|
851
942
|
Rename columns in the DataFrame.
|
852
943
|
|
853
944
|
• Parameters:
|
@@ -869,7 +960,7 @@ Rename columns in the DataFrame.
|
|
869
960
|
|
870
961
|
--------------------------------------------------------------------------------
|
871
962
|
|
872
|
-
###
|
963
|
+
### 31. `cascade_sort`
|
873
964
|
Cascade sort the DataFrame by specified columns and order.
|
874
965
|
|
875
966
|
• Parameters:
|
@@ -895,7 +986,7 @@ Cascade sort the DataFrame by specified columns and order.
|
|
895
986
|
|
896
987
|
--------------------------------------------------------------------------------
|
897
988
|
|
898
|
-
###
|
989
|
+
### 32. `append_xgb_labels`
|
899
990
|
Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
|
900
991
|
|
901
992
|
• Parameters:
|
@@ -917,7 +1008,7 @@ Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
|
|
917
1008
|
|
918
1009
|
--------------------------------------------------------------------------------
|
919
1010
|
|
920
|
-
###
|
1011
|
+
### 33. `append_xgb_regression_predictions`
|
921
1012
|
Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` column for TRAIN/TEST splits.
|
922
1013
|
|
923
1014
|
• Parameters:
|
@@ -949,7 +1040,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
|
|
949
1040
|
|
950
1041
|
--------------------------------------------------------------------------------
|
951
1042
|
|
952
|
-
###
|
1043
|
+
### 34. `append_xgb_logistic_regression_predictions`
|
953
1044
|
Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TYPE` column for TRAIN/TEST splits.
|
954
1045
|
|
955
1046
|
• Parameters:
|
@@ -981,7 +1072,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
|
|
981
1072
|
|
982
1073
|
--------------------------------------------------------------------------------
|
983
1074
|
|
984
|
-
###
|
1075
|
+
### 35. `print_n_frequency_cascading`
|
985
1076
|
Print the cascading frequency of top n values for specified columns.
|
986
1077
|
|
987
1078
|
• Parameters:
|
@@ -1001,7 +1092,7 @@ Print the cascading frequency of top n values for specified columns.
|
|
1001
1092
|
|
1002
1093
|
--------------------------------------------------------------------------------
|
1003
1094
|
|
1004
|
-
###
|
1095
|
+
### 36. `print_n_frequency_linear`
|
1005
1096
|
|
1006
1097
|
Prints the linear frequency of the top `n` values for specified columns.
|
1007
1098
|
|
@@ -1030,7 +1121,7 @@ This example analyzes the `City` column, printing the top 2 most frequent values
|
|
1030
1121
|
|
1031
1122
|
--------------------------------------------------------------------------------
|
1032
1123
|
|
1033
|
-
###
|
1124
|
+
### 37. `retain_columns`
|
1034
1125
|
Retain specified columns in the DataFrame and drop the others.
|
1035
1126
|
|
1036
1127
|
• Parameters:
|
@@ -1052,7 +1143,7 @@ Retain specified columns in the DataFrame and drop the others.
|
|
1052
1143
|
|
1053
1144
|
--------------------------------------------------------------------------------
|
1054
1145
|
|
1055
|
-
###
|
1146
|
+
### 38. `mask_against_dataframe`
|
1056
1147
|
Retain only rows with common column values between two DataFrames.
|
1057
1148
|
|
1058
1149
|
• Parameters:
|
@@ -1077,7 +1168,7 @@ Retain only rows with common column values between two DataFrames.
|
|
1077
1168
|
|
1078
1169
|
--------------------------------------------------------------------------------
|
1079
1170
|
|
1080
|
-
###
|
1171
|
+
### 39. `mask_against_dataframe_converse`
|
1081
1172
|
Retain only rows with uncommon column values between two DataFrames.
|
1082
1173
|
|
1083
1174
|
• Parameters:
|
@@ -1102,7 +1193,7 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
1102
1193
|
|
1103
1194
|
--------------------------------------------------------------------------------
|
1104
1195
|
|
1105
|
-
###
|
1196
|
+
### 40. `union_join`
|
1106
1197
|
Perform a union join, concatenating two DataFrames and dropping duplicates.
|
1107
1198
|
|
1108
1199
|
• Parameters:
|
@@ -1125,7 +1216,7 @@ Perform a union join, concatenating two DataFrames and dropping duplicates.
|
|
1125
1216
|
|
1126
1217
|
--------------------------------------------------------------------------------
|
1127
1218
|
|
1128
|
-
###
|
1219
|
+
### 41. `bag_union_join`
|
1129
1220
|
Perform a bag union join, concatenating two DataFrames without dropping duplicates.
|
1130
1221
|
|
1131
1222
|
• Parameters:
|
@@ -1148,7 +1239,7 @@ Perform a bag union join, concatenating two DataFrames without dropping duplicat
|
|
1148
1239
|
|
1149
1240
|
--------------------------------------------------------------------------------
|
1150
1241
|
|
1151
|
-
###
|
1242
|
+
### 42. `left_join`
|
1152
1243
|
Perform a left join on two DataFrames.
|
1153
1244
|
|
1154
1245
|
• Parameters:
|
@@ -1173,7 +1264,7 @@ Perform a left join on two DataFrames.
|
|
1173
1264
|
|
1174
1265
|
--------------------------------------------------------------------------------
|
1175
1266
|
|
1176
|
-
###
|
1267
|
+
### 43. `right_join`
|
1177
1268
|
Perform a right join on two DataFrames.
|
1178
1269
|
|
1179
1270
|
• Parameters:
|
@@ -1198,7 +1289,7 @@ Perform a right join on two DataFrames.
|
|
1198
1289
|
|
1199
1290
|
--------------------------------------------------------------------------------
|
1200
1291
|
|
1201
|
-
###
|
1292
|
+
### 44. `insert_dataframe_in_sqlite_database`
|
1202
1293
|
|
1203
1294
|
Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
|
1204
1295
|
|
@@ -1236,7 +1327,7 @@ Inserts a Pandas DataFrame into a SQLite database table. If the specified table
|
|
1236
1327
|
|
1237
1328
|
--------------------------------------------------------------------------------
|
1238
1329
|
|
1239
|
-
###
|
1330
|
+
### 45. `sync_dataframe_to_sqlite_database`
|
1240
1331
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
|
1241
1332
|
|
1242
1333
|
• Parameters:
|
@@ -1260,6 +1351,8 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
|
1260
1351
|
|
1261
1352
|
--------------------------------------------------------------------------------
|
1262
1353
|
|
1354
|
+
|
1355
|
+
|
1263
1356
|
## Additional Info
|
1264
1357
|
|
1265
1358
|
For more information, refer to each function’s docstring by calling:
|
@@ -0,0 +1,11 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=SZg1HPP5D_3QimoYFH8zongQ9D9XPZWp-Qi-MZglvXw,1315
|
2
|
+
rgwfuncs/algebra_lib.py,sha256=1_ZTDVdfZcnXTlOOZlI2sAyJm2gA1lyje8l3h68kjlI,10902
|
3
|
+
rgwfuncs/df_lib.py,sha256=G_H3PXNVeseX2YLjkkrmO9eXA_7r29swUZlbPBDZjXA,66612
|
4
|
+
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
|
+
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
6
|
+
rgwfuncs-0.0.26.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
7
|
+
rgwfuncs-0.0.26.dist-info/METADATA,sha256=VXXptuvGxZt1riPsQjIGkYTnyxvikcWht9sjcnLEpU4,38637
|
8
|
+
rgwfuncs-0.0.26.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
+
rgwfuncs-0.0.26.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
10
|
+
rgwfuncs-0.0.26.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
11
|
+
rgwfuncs-0.0.26.dist-info/RECORD,,
|
rgwfuncs-0.0.24.dist-info/RECORD
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=UrSka0KkoaZfLtODgbEbGvo67-L0LK1-e9waRn2a95g,1334
|
2
|
-
rgwfuncs/algebra_lib.py,sha256=aayZogB2Rp9JAo5kVHpauqX_R346eI_rIuE5QNEMlKM,7789
|
3
|
-
rgwfuncs/df_lib.py,sha256=8KMn4FucI19EFBHUoGOS7R4mo0degg6A6802sjy7BH4,67677
|
4
|
-
rgwfuncs/docs_lib.py,sha256=iZlQMNS52FuiblCI0oXJVznSuCndeG6WqZfsm-Xnd7U,1918
|
5
|
-
rgwfuncs/str_lib.py,sha256=I5B0WOGaLUGaedMG7hqiKnIqV7Jc9h1RYlgOiC_-iGY,3678
|
6
|
-
rgwfuncs-0.0.24.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
7
|
-
rgwfuncs-0.0.24.dist-info/METADATA,sha256=-Nv5cA1xWJNfDSp53_jkaTce5n93hAH7mS7bv2IFWes,35516
|
8
|
-
rgwfuncs-0.0.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
-
rgwfuncs-0.0.24.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
10
|
-
rgwfuncs-0.0.24.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
11
|
-
rgwfuncs-0.0.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|