rgwfuncs 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  # This file is automatically generated
2
2
  # Dynamically importing functions from modules
3
3
 
4
- from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, df_docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
5
- from .str_lib import send_telegram_message, str_docs
4
+ from .algebra_lib import compute_algebraic_expression, get_prime_factors_latex, simplify_algebraic_expression, solve_algebraic_expression
5
+ from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
6
+ from .docs_lib import docs
7
+ from .str_lib import send_telegram_message
@@ -0,0 +1,186 @@
1
+ import re
2
+ import math
3
+ from sympy import symbols, latex, simplify, solve, diff, Expr
4
+ from sympy.parsing.sympy_parser import parse_expr
5
+ from typing import Tuple, List, Dict, Optional
6
+
7
+ def compute_algebraic_expression(expression: str) -> float:
8
+ try:
9
+ # Direct numerical evaluation
10
+ # Safely evaluate the expression using the math module
11
+ numeric_result = eval(expression, {"__builtins__": None, "math": math})
12
+
13
+ # Convert to float if possible
14
+ return float(numeric_result)
15
+ except Exception as e:
16
+ raise ValueError(f"Error computing expression: {e}")
17
+
18
+ def simplify_algebraic_expression(expression: str) -> str:
19
+
20
+
21
+ def recursive_parse_function_call(func_call: str, prefix: str, sym_vars: Dict[str, Expr]) -> Tuple[str, List[Expr]]:
22
+ # print(f"Parsing function call: {func_call}")
23
+
24
+ # Match the function name and arguments
25
+ match = re.match(fr'{prefix}\.(\w+)\((.*)\)', func_call, re.DOTALL)
26
+ if not match:
27
+ raise ValueError(f"Invalid function call: {func_call}")
28
+
29
+ func_name = match.group(1)
30
+ args_str = match.group(2)
31
+
32
+ # Check if it's a list for np
33
+ if prefix == 'np' and args_str.startswith("[") and args_str.endswith("]"):
34
+ parsed_args = [ast.literal_eval(args_str.strip())]
35
+ else:
36
+ parsed_args = []
37
+ raw_args = re.split(r',(?![^{]*\})', args_str)
38
+ for arg in raw_args:
39
+ arg = arg.strip()
40
+ if re.match(r'\w+\.\w+\(', arg):
41
+ # Recursively evaluate the argument if it's another function call
42
+ arg_val = recursive_eval_func(re.match(r'\w+\.\w+\(.*\)', arg), sym_vars)
43
+ parsed_args.append(parse_expr(arg_val, local_dict=sym_vars))
44
+ else:
45
+ parsed_args.append(parse_expr(arg, local_dict=sym_vars))
46
+
47
+ # print(f"Function name: {func_name}, Parsed arguments: {parsed_args}")
48
+ return func_name, parsed_args
49
+
50
+
51
+ def recursive_eval_func(match: re.Match, sym_vars: Dict[str, Expr]) -> str:
52
+ # print("152", match)
53
+ func_call = match.group(0)
54
+ # print(f"153 Evaluating function call: {func_call}")
55
+
56
+ if func_call.startswith("np."):
57
+ func_name, args = recursive_parse_function_call(func_call, 'np', sym_vars)
58
+ if func_name == 'diff':
59
+ expr = args[0]
60
+ if isinstance(expr, list):
61
+ # Calculate discrete difference
62
+ diff_result = [expr[i] - expr[i - 1] for i in range(1, len(expr))]
63
+ return str(diff_result)
64
+ # Perform symbolic differentiation
65
+ diff_result = diff(expr)
66
+ return str(diff_result)
67
+
68
+ if func_call.startswith("math."):
69
+ func_name, args = recursive_parse_function_call(func_call, 'math', sym_vars)
70
+ if hasattr(math, func_name):
71
+ result = getattr(math, func_name)(*args)
72
+ return str(result)
73
+
74
+ if func_call.startswith("sym."):
75
+ initial_method_match = re.match(r'(sym\.\w+\([^()]*\))(\.(\w+)\((.*?)\))*', func_call, re.DOTALL)
76
+ if initial_method_match:
77
+ base_expr_str = initial_method_match.group(1)
78
+ base_func_name, base_args = recursive_parse_function_call(base_expr_str, 'sym', sym_vars)
79
+ if base_func_name == 'solve':
80
+ solutions = solve(base_args[0], base_args[1])
81
+ # print(f"Solutions found: {solutions}")
82
+
83
+ method_chain = re.findall(r'\.(\w+)\((.*?)\)', func_call, re.DOTALL)
84
+ final_solutions = [execute_chained_methods(sol, [(m, [method_args.strip()]) for m, method_args in method_chain], sym_vars) for sol in solutions]
85
+
86
+ return "[" + ",".join(latex(simplify(sol)) for sol in final_solutions) + "]"
87
+
88
+ raise ValueError(f"Unknown function call: {func_call}")
89
+
90
+ def execute_chained_methods(sym_expr: Expr, method_chain: List[Tuple[str, List[str]]], sym_vars: Dict[str, Expr]) -> Expr:
91
+ for method_name, method_args in method_chain:
92
+ # print(f"Executing method: {method_name} with arguments: {method_args}")
93
+ method = getattr(sym_expr, method_name, None)
94
+ if method:
95
+ if method_name == 'subs' and isinstance(method_args[0], dict):
96
+ kwargs = method_args[0]
97
+ kwargs = {parse_expr(k, local_dict=sym_vars): parse_expr(v, local_dict=sym_vars) for k, v in kwargs.items()}
98
+ sym_expr = method(kwargs)
99
+ else:
100
+ args = [parse_expr(arg.strip(), local_dict=sym_vars) for arg in method_args]
101
+ sym_expr = method(*args)
102
+ # print(f"Result after {method_name}: {sym_expr}")
103
+ return sym_expr
104
+
105
+
106
+
107
+ variable_names = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
108
+ sym_vars = {var: symbols(var) for var in variable_names}
109
+
110
+ patterns = {
111
+ #"numpy_diff_brackets": r"np\.diff\(\[.*?\]\)",
112
+ "numpy_diff_no_brackets": r"np\.diff\([^()]*\)",
113
+ "math_functions": r"math\.\w+\((?:[^()]*(?:\([^()]*\)[^()]*)*)\)",
114
+ # "sympy_functions": r"sym\.\w+\([^()]*\)(?:\.\w+\([^()]*\))?",
115
+ }
116
+
117
+ function_pattern = '|'.join(patterns.values())
118
+
119
+ # Use a lambda function to pass additional arguments
120
+ processed_expression = re.sub(function_pattern, lambda match: recursive_eval_func(match, sym_vars), expression)
121
+ # print("Level 2 processed_expression:", processed_expression)
122
+
123
+ try:
124
+ if processed_expression.startswith('[') and processed_expression.endswith(']'):
125
+ return processed_expression
126
+
127
+ expr = parse_expr(processed_expression, local_dict=sym_vars)
128
+ final_result = simplify(expr)
129
+
130
+ if final_result.free_symbols:
131
+ latex_result = latex(final_result)
132
+ return latex_result
133
+ else:
134
+ return str(final_result)
135
+
136
+ except Exception as e:
137
+ raise ValueError(f"Error simplifying expression: {e}")
138
+
139
+ def solve_algebraic_expression(expression: str, variable: str, subs: Optional[Dict[str, float]] = None) -> str:
140
+ try:
141
+ # Create symbols for the variables in the expression
142
+ variable_symbols = set(re.findall(r'\b[a-zA-Z]\w*\b', expression))
143
+ sym_vars = {var: symbols(var) for var in variable_symbols}
144
+
145
+ # Parse the expression and solve it
146
+ expr = parse_expr(expression, local_dict=sym_vars)
147
+ var_symbol = symbols(variable)
148
+ solutions = solve(expr, var_symbol)
149
+
150
+ # Apply substitutions if provided
151
+ if subs:
152
+ subs_symbols = {symbols(k): v for k, v in subs.items()}
153
+ solutions = [simplify(sol.subs(subs_symbols)) for sol in solutions]
154
+
155
+ # Convert solutions to LaTeX strings if possible
156
+ latex_solutions = [latex(simplify(sol)) if sol.free_symbols else str(sol) for sol in solutions]
157
+ result = r"\left[" + ", ".join(latex_solutions) + r"\right]"
158
+ print("158", result)
159
+ return result
160
+
161
+ except Exception as e:
162
+ raise ValueError(f"Error solving the expression: {e}")
163
+
164
+
165
+
166
+ def get_prime_factors_latex(n: int) -> str:
167
+ """
168
+ Return the prime factors of a number as a LaTeX expression.
169
+ """
170
+ factors = []
171
+ while n % 2 == 0:
172
+ factors.append(2)
173
+ n //= 2
174
+ for i in range(3, int(math.sqrt(n)) + 1, 2):
175
+ while n % i == 0:
176
+ factors.append(i)
177
+ n //= i
178
+ if n > 2:
179
+ factors.append(n)
180
+
181
+ factor_counts = {factor: factors.count(factor) for factor in set(factors)}
182
+ latex_factors = [f"{factor}^{{{count}}}" if count > 1 else str(factor) for factor, count in factor_counts.items()]
183
+ return " \\cdot ".join(latex_factors)
184
+
185
+
186
+
rgwfuncs/df_lib.py CHANGED
@@ -29,43 +29,6 @@ import warnings
29
29
  warnings.filterwarnings("ignore", category=FutureWarning)
30
30
 
31
31
 
32
- def df_docs(method_type_filter: Optional[str] = None) -> None:
33
- """
34
- Print a list of function names in alphabetical order. If method_type_filter
35
- is specified, print the docstrings of the functions that match the filter.
36
- Using '*' as a filter will print the docstrings for all functions.
37
-
38
- Parameters:
39
- method_type_filter: Optional filter string representing a function name,
40
- or '*' to display docstrings for all functions.
41
- """
42
- # Get the current module's namespace
43
- current_module = __name__
44
-
45
- local_functions: Dict[str, Callable] = {
46
- name: obj for name, obj in globals().items()
47
- if inspect.isfunction(obj) and obj.__module__ == current_module
48
- }
49
-
50
- # List of function names sorted alphabetically
51
- function_names = sorted(local_functions.keys())
52
-
53
- # Print function names
54
- print("Functions in alphabetical order:")
55
- for name in function_names:
56
- print(name)
57
-
58
- # If a filter is provided or '*', print the docstrings of functions
59
- if method_type_filter:
60
- # print("\nFiltered function documentation:")
61
- for name, func in local_functions.items():
62
- docstring: Optional[str] = func.__doc__
63
- if docstring:
64
- if method_type_filter == '*' or method_type_filter == name:
65
- # Print the entire docstring for the matching function
66
- print(f"\n{name}:\n{docstring}")
67
-
68
-
69
32
  def numeric_clean(
70
33
  df: pd.DataFrame,
71
34
  column_names: str,
rgwfuncs/docs_lib.py ADDED
@@ -0,0 +1,49 @@
1
+ import os
2
+ import inspect
3
+ from typing import Tuple, Optional, Dict, Callable
4
+ import warnings
5
+
6
+ # Suppress all FutureWarnings
7
+ warnings.filterwarnings("ignore", category=FutureWarning)
8
+
9
+ def docs(method_type_filter: Optional[str] = None) -> None:
10
+ """
11
+ Print a list of function names in alphabetical order from all modules.
12
+ If method_type_filter is specified, print the docstrings of the functions
13
+ that match the filter based on a substring. Using '*' as a filter will print
14
+ the docstrings for all functions.
15
+
16
+ Parameters:
17
+ method_type_filter: Optional filter string representing a filter for
18
+ function names, or '*' to display docstrings for all functions.
19
+ """
20
+
21
+ # Directory containing your modules
22
+ module_dir = os.path.dirname(__file__)
23
+
24
+ # Iterate over each file in the module directory
25
+ for filename in sorted(os.listdir(module_dir)):
26
+ if filename.endswith('.py') and filename != '__init__.py':
27
+ module_name, _ = os.path.splitext(filename)
28
+ print(f"\n# {module_name}.py")
29
+
30
+ # Import the module
31
+ module_path = f"rgwfuncs.{module_name}"
32
+ module = __import__(module_path, fromlist=[module_name])
33
+
34
+ # Get all functions from the module
35
+ functions = {
36
+ name: obj for name, obj
37
+ in inspect.getmembers(module, inspect.isfunction)
38
+ if obj.__module__ == module_path
39
+ }
40
+
41
+ # List function names
42
+ function_names = sorted(functions.keys())
43
+ for name in function_names:
44
+ # If a filter is provided or '*', check if the function name contains the filter
45
+ if method_type_filter and (method_type_filter == '*' or method_type_filter in name):
46
+ docstring: Optional[str] = functions[name].__doc__
47
+ if docstring:
48
+ print(f"\n{name}:\n{docstring}")
49
+
rgwfuncs/str_lib.py CHANGED
@@ -9,45 +9,9 @@ import warnings
9
9
  warnings.filterwarnings("ignore", category=FutureWarning)
10
10
 
11
11
 
12
- def str_docs(method_type_filter: Optional[str] = None) -> None:
13
- """
14
- Print a list of function names in alphabetical order. If method_type_filter
15
- is specified, print the docstrings of the functions that match the filter.
16
- Using '*' as a filter will print the docstrings for all functions.
17
-
18
- Parameters:
19
- method_type_filter: Optional filter string representing a function name,
20
- or '*' to display docstrings for all functions.
21
- """
22
- # Get the current module's namespace
23
- current_module = __name__
24
-
25
- local_functions: Dict[str, Callable] = {
26
- name: obj for name, obj in globals().items()
27
- if inspect.isfunction(obj) and obj.__module__ == current_module
28
- }
29
-
30
- # List of function names sorted alphabetically
31
- function_names = sorted(local_functions.keys())
32
-
33
- # Print function names
34
- print("Functions in alphabetical order:")
35
- for name in function_names:
36
- print(name)
37
-
38
- # If a filter is provided or '*', print the docstrings of functions
39
- if method_type_filter:
40
- # print("\nFiltered function documentation:")
41
- for name, func in local_functions.items():
42
- docstring: Optional[str] = func.__doc__
43
- if docstring:
44
- if method_type_filter == '*' or method_type_filter == name:
45
- # Print the entire docstring for the matching function
46
- print(f"\n{name}:\n{docstring}")
47
-
48
-
49
12
  def send_telegram_message(preset_name: str, message: str) -> None:
50
- """Send a Telegram message using the specified preset.
13
+ """
14
+ Send a Telegram message using the specified preset.
51
15
 
52
16
  Args:
53
17
  preset_name (str): The name of the preset to use for sending the message.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.23
3
+ Version: 0.0.25
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -135,22 +135,126 @@ To display all docstrings, use:
135
135
 
136
136
  --------------------------------------------------------------------------------
137
137
 
138
- ## String Based Functions
138
+ ## Documentation Access Functions
139
139
 
140
- ### 1. str_docs
141
- Print a list of available function names in alphabetical order. If a filter is provided, print the matching docstrings.
140
+ ### 1. docs
141
+ Print a list of available function names in alphabetical order. If a filter is provided, print the docstrings of functions containing the term.
142
142
 
143
143
  • Parameters:
144
144
  - `method_type_filter` (str): Optional, comma-separated to select docstring types, or '*' for all.
145
145
 
146
146
  • Example:
147
147
 
148
- import rgwfuncs
149
- rgwfuncs.str_docs(method_type_filter='numeric_clean,limit_dataframe')
148
+ from rgwfuncs import docs
149
+ docs(method_type_filter='numeric_clean,limit_dataframe')
150
150
 
151
151
  --------------------------------------------------------------------------------
152
152
 
153
- ### 2. send_telegram_message
153
+ ## Algebra Based Functions
154
+
155
+ This section provides comprehensive functions for handling algebraic expressions, performing tasks such as computation, simplification, solving equations, and prime factorization, all outputted in LaTeX format.
156
+
157
+ ### 1. `compute_algebraic_expression`
158
+
159
+ Evaluates complex algebraic expressions and provides numerical results.
160
+
161
+ - **Parameters:**
162
+ - `expression` (str): A string representing an arithmetic operation.
163
+
164
+ - **Returns:**
165
+ - `float`: The computed numerical result.
166
+
167
+ - **Example:**
168
+
169
+ from rgwfuncs import compute_algebraic_expression
170
+ result1 = compute_algebraic_expression("2 + 2")
171
+ print(result1) # Output: 4.0
172
+
173
+ result2 = compute_algebraic_expression("10 % 3")
174
+ print(result2) # Output: 1.0
175
+
176
+ result3 = compute_algebraic_expression("math.gcd(36, 60) * math.sin(math.radians(45)) * 10000")
177
+ print(result3) # Output: 84852.8137423857
178
+
179
+ These examples illustrate the ability to handle basic arithmetic, the modulo operator, and functions utilizing the Python math module.
180
+
181
+ --------------------------------------------------------------------------------
182
+
183
+ ### 2. `simplify_algebraic_expression`
184
+
185
+ Simplifies expressions and returns them in LaTeX format.
186
+
187
+ - **Parameters:**
188
+ - `expression` (str): A string of the expression to simplify.
189
+
190
+ - **Returns:**
191
+ - `str`: Simplified expression in LaTeX.
192
+
193
+ - **Example:**
194
+
195
+ from rgwfuncs import simplify_algebraic_expression
196
+ simplified_expr1 = simplify_algebraic_expression("2*x + 3*x")
197
+ print(simplified_expr1) # Output: "5 x"
198
+
199
+ simplified_expr2 = simplify_algebraic_expression("(np.diff(3*x**8)) / (np.diff(8*x**30) * 11*y**3)")
200
+ print(simplified_expr2) # Output: "\frac{1}{110 x^{22} y^{3}}"
201
+
202
+ These examples demonstrate simplification of polynomial expressions and more complex ratios involving derivatives.
203
+
204
+ --------------------------------------------------------------------------------
205
+
206
+ ### 3. `solve_algebraic_expression`
207
+
208
+ Solves equations for specified variables, with optional substitutions, returning LaTeX-formatted solutions.
209
+
210
+ - **Parameters:**
211
+ - `expression` (str): A string of the equation to solve.
212
+ - `variable` (str): The variable to solve for.
213
+ - `subs` (Optional[Dict[str, float]]): Substitutions for variables.
214
+
215
+ - **Returns:**
216
+ - `str`: Solutions formatted in LaTeX.
217
+
218
+ - **Example:**
219
+
220
+ from rgwfuncs import solve_algebraic_expression
221
+ solutions1 = solve_algebraic_expression("a*x**2 + b*x + c", "x", {"a": 3, "b": 7, "c": 5})
222
+ print(solutions1) # Output: "\left[-7/6 - sqrt(11)*I/6, -7/6 + sqrt(11)*I/6\right]"
223
+
224
+ solutions2 = solve_algebraic_expression("x**2 - 4", "x")
225
+ print(solutions2) # Output: "\left[-2, 2\right]"
226
+
227
+ Here, we solve both a quadratic equation with complex solutions and a simpler polynomial equation.
228
+
229
+ --------------------------------------------------------------------------------
230
+
231
+ ### 4. `get_prime_factors_latex`
232
+
233
+ Computes prime factors of a number and presents them in LaTeX format.
234
+
235
+ - **Parameters:**
236
+ - `n` (int): The integer to factorize.
237
+
238
+ - **Returns:**
239
+ - `str`: Prime factorization in LaTeX.
240
+
241
+ - **Example:**
242
+
243
+ from rgwfuncs import get_prime_factors_latex
244
+ factors1 = get_prime_factors_latex(100)
245
+ print(factors1) # Output: "2^{2} \cdot 5^{2}"
246
+
247
+ factors2 = get_prime_factors_latex(60)
248
+ print(factors2) # Output: "2^{2} \cdot 3 \cdot 5"
249
+
250
+ factors3 = get_prime_factors_latex(17)
251
+ print(factors3) # Output: "17"
252
+
253
+ --------------------------------------------------------------------------------
254
+
255
+ ## String Based Functions
256
+
257
+ ### 1. send_telegram_message
154
258
 
155
259
  Send a message to a Telegram chat using a specified preset from your configuration file.
156
260
 
@@ -176,20 +280,7 @@ Send a message to a Telegram chat using a specified preset from your configurati
176
280
 
177
281
  Below is a quick reference of available functions, their purpose, and basic usage examples.
178
282
 
179
- ### 1. df_docs
180
- Print a list of available function names in alphabetical order. If a filter is provided, print the matching docstrings.
181
-
182
- • Parameters:
183
- - `method_type_filter` (str): Optional, comma-separated to select docstring types, or '*' for all.
184
-
185
- • Example:
186
-
187
- import rgwfuncs
188
- rgwfuncs.df_docs(method_type_filter='numeric_clean,limit_dataframe')
189
-
190
- --------------------------------------------------------------------------------
191
-
192
- ### 2. `numeric_clean`
283
+ ### 1. `numeric_clean`
193
284
  Cleans the numeric columns in a DataFrame according to specified treatments.
194
285
 
195
286
  • Parameters:
@@ -218,7 +309,7 @@ Cleans the numeric columns in a DataFrame according to specified treatments.
218
309
 
219
310
  --------------------------------------------------------------------------------
220
311
 
221
- ### 3. `limit_dataframe`
312
+ ### 2. `limit_dataframe`
222
313
  Limit the DataFrame to a specified number of rows.
223
314
 
224
315
  • Parameters:
@@ -239,7 +330,7 @@ Limit the DataFrame to a specified number of rows.
239
330
 
240
331
  --------------------------------------------------------------------------------
241
332
 
242
- ### 4. `from_raw_data`
333
+ ### 3. `from_raw_data`
243
334
  Create a DataFrame from raw data.
244
335
 
245
336
  • Parameters:
@@ -265,7 +356,7 @@ Create a DataFrame from raw data.
265
356
 
266
357
  --------------------------------------------------------------------------------
267
358
 
268
- ### 5. `append_rows`
359
+ ### 4. `append_rows`
269
360
  Append rows to the DataFrame.
270
361
 
271
362
  • Parameters:
@@ -290,7 +381,7 @@ Append rows to the DataFrame.
290
381
 
291
382
  --------------------------------------------------------------------------------
292
383
 
293
- ### 6. `append_columns`
384
+ ### 5. `append_columns`
294
385
  Append new columns to the DataFrame with None values.
295
386
 
296
387
  • Parameters:
@@ -311,7 +402,7 @@ Append new columns to the DataFrame with None values.
311
402
 
312
403
  --------------------------------------------------------------------------------
313
404
 
314
- ### 7. `update_rows`
405
+ ### 6. `update_rows`
315
406
  Update specific rows in the DataFrame based on a condition.
316
407
 
317
408
  • Parameters:
@@ -333,7 +424,7 @@ Update specific rows in the DataFrame based on a condition.
333
424
 
334
425
  --------------------------------------------------------------------------------
335
426
 
336
- ### 8. `delete_rows`
427
+ ### 7. `delete_rows`
337
428
  Delete rows from the DataFrame based on a condition.
338
429
 
339
430
  • Parameters:
@@ -354,7 +445,7 @@ Delete rows from the DataFrame based on a condition.
354
445
 
355
446
  --------------------------------------------------------------------------------
356
447
 
357
- ### 9. `drop_duplicates`
448
+ ### 8. `drop_duplicates`
358
449
  Drop duplicate rows in the DataFrame, retaining the first occurrence.
359
450
 
360
451
  • Parameters:
@@ -374,7 +465,7 @@ Drop duplicate rows in the DataFrame, retaining the first occurrence.
374
465
 
375
466
  --------------------------------------------------------------------------------
376
467
 
377
- ### 10. `drop_duplicates_retain_first`
468
+ ### 9. `drop_duplicates_retain_first`
378
469
  Drop duplicate rows based on specified columns, retaining the first occurrence.
379
470
 
380
471
  • Parameters:
@@ -395,7 +486,7 @@ Drop duplicate rows based on specified columns, retaining the first occurrence.
395
486
 
396
487
  --------------------------------------------------------------------------------
397
488
 
398
- ### 11. `drop_duplicates_retain_last`
489
+ ### 10. `drop_duplicates_retain_last`
399
490
  Drop duplicate rows based on specified columns, retaining the last occurrence.
400
491
 
401
492
  • Parameters:
@@ -417,7 +508,7 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
417
508
 
418
509
  --------------------------------------------------------------------------------
419
510
 
420
- ### 12. `load_data_from_query`
511
+ ### 11. `load_data_from_query`
421
512
 
422
513
  Load data from a database query into a DataFrame based on a configuration preset.
423
514
 
@@ -444,7 +535,7 @@ Load data from a database query into a DataFrame based on a configuration preset
444
535
 
445
536
  --------------------------------------------------------------------------------
446
537
 
447
- ### 13. `load_data_from_path`
538
+ ### 12. `load_data_from_path`
448
539
  Load data from a file into a DataFrame based on the file extension.
449
540
 
450
541
  • Parameters:
@@ -463,7 +554,7 @@ Load data from a file into a DataFrame based on the file extension.
463
554
 
464
555
  --------------------------------------------------------------------------------
465
556
 
466
- ### 14. `load_data_from_sqlite_path`
557
+ ### 13. `load_data_from_sqlite_path`
467
558
  Execute a query on a SQLite database file and return the results as a DataFrame.
468
559
 
469
560
  • Parameters:
@@ -483,7 +574,7 @@ Execute a query on a SQLite database file and return the results as a DataFrame.
483
574
 
484
575
  --------------------------------------------------------------------------------
485
576
 
486
- ### 15. `first_n_rows`
577
+ ### 14. `first_n_rows`
487
578
  Display the first n rows of the DataFrame (prints out in dictionary format).
488
579
 
489
580
  • Parameters:
@@ -501,7 +592,7 @@ Display the first n rows of the DataFrame (prints out in dictionary format).
501
592
 
502
593
  --------------------------------------------------------------------------------
503
594
 
504
- ### 16. `last_n_rows`
595
+ ### 15. `last_n_rows`
505
596
  Display the last n rows of the DataFrame (prints out in dictionary format).
506
597
 
507
598
  • Parameters:
@@ -519,7 +610,7 @@ Display the last n rows of the DataFrame (prints out in dictionary format).
519
610
 
520
611
  --------------------------------------------------------------------------------
521
612
 
522
- ### 17. `top_n_unique_values`
613
+ ### 16. `top_n_unique_values`
523
614
  Print the top n unique values for specified columns in the DataFrame.
524
615
 
525
616
  • Parameters:
@@ -538,7 +629,7 @@ Print the top n unique values for specified columns in the DataFrame.
538
629
 
539
630
  --------------------------------------------------------------------------------
540
631
 
541
- ### 18. `bottom_n_unique_values`
632
+ ### 17. `bottom_n_unique_values`
542
633
  Print the bottom n unique values for specified columns in the DataFrame.
543
634
 
544
635
  • Parameters:
@@ -557,7 +648,7 @@ Print the bottom n unique values for specified columns in the DataFrame.
557
648
 
558
649
  --------------------------------------------------------------------------------
559
650
 
560
- ### 19. `print_correlation`
651
+ ### 18. `print_correlation`
561
652
  Print correlation for multiple pairs of columns in the DataFrame.
562
653
 
563
654
  • Parameters:
@@ -582,7 +673,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
582
673
 
583
674
  --------------------------------------------------------------------------------
584
675
 
585
- ### 20. `print_memory_usage`
676
+ ### 19. `print_memory_usage`
586
677
  Print the memory usage of the DataFrame in megabytes.
587
678
 
588
679
  • Parameters:
@@ -599,7 +690,7 @@ Print the memory usage of the DataFrame in megabytes.
599
690
 
600
691
  --------------------------------------------------------------------------------
601
692
 
602
- ### 21. `filter_dataframe`
693
+ ### 20. `filter_dataframe`
603
694
  Return a new DataFrame filtered by a given query expression.
604
695
 
605
696
  • Parameters:
@@ -625,7 +716,7 @@ Return a new DataFrame filtered by a given query expression.
625
716
 
626
717
  --------------------------------------------------------------------------------
627
718
 
628
- ### 22. `filter_indian_mobiles`
719
+ ### 21. `filter_indian_mobiles`
629
720
  Filter and return rows containing valid Indian mobile numbers in the specified column.
630
721
 
631
722
  • Parameters:
@@ -647,7 +738,7 @@ Filter and return rows containing valid Indian mobile numbers in the specified c
647
738
 
648
739
  --------------------------------------------------------------------------------
649
740
 
650
- ### 23. `print_dataframe`
741
+ ### 22. `print_dataframe`
651
742
  Print the entire DataFrame and its column types. Optionally print a source path.
652
743
 
653
744
  • Parameters:
@@ -665,7 +756,7 @@ Print the entire DataFrame and its column types. Optionally print a source path.
665
756
 
666
757
  --------------------------------------------------------------------------------
667
758
 
668
- ### 24. `send_dataframe_via_telegram`
759
+ ### 23. `send_dataframe_via_telegram`
669
760
  Send a DataFrame via Telegram using a specified bot configuration.
670
761
 
671
762
  • Parameters:
@@ -692,7 +783,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
692
783
 
693
784
  --------------------------------------------------------------------------------
694
785
 
695
- ### 25. `send_data_to_email`
786
+ ### 24. `send_data_to_email`
696
787
  Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
697
788
 
698
789
  • Parameters:
@@ -722,7 +813,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
722
813
 
723
814
  --------------------------------------------------------------------------------
724
815
 
725
- ### 26. `send_data_to_slack`
816
+ ### 25. `send_data_to_slack`
726
817
  Send a DataFrame or message to Slack using a specified bot configuration.
727
818
 
728
819
  • Parameters:
@@ -748,7 +839,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
748
839
 
749
840
  --------------------------------------------------------------------------------
750
841
 
751
- ### 27. `order_columns`
842
+ ### 26. `order_columns`
752
843
  Reorder the columns of a DataFrame based on a string input.
753
844
 
754
845
  • Parameters:
@@ -770,7 +861,7 @@ Reorder the columns of a DataFrame based on a string input.
770
861
 
771
862
  --------------------------------------------------------------------------------
772
863
 
773
- ### 28. `append_ranged_classification_column`
864
+ ### 27. `append_ranged_classification_column`
774
865
  Append a ranged classification column to the DataFrame.
775
866
 
776
867
  • Parameters:
@@ -794,7 +885,7 @@ Append a ranged classification column to the DataFrame.
794
885
 
795
886
  --------------------------------------------------------------------------------
796
887
 
797
- ### 29. `append_percentile_classification_column`
888
+ ### 28. `append_percentile_classification_column`
798
889
  Append a percentile classification column to the DataFrame.
799
890
 
800
891
  • Parameters:
@@ -818,7 +909,7 @@ Append a percentile classification column to the DataFrame.
818
909
 
819
910
  --------------------------------------------------------------------------------
820
911
 
821
- ### 30. `append_ranged_date_classification_column`
912
+ ### 29. `append_ranged_date_classification_column`
822
913
  Append a ranged date classification column to the DataFrame.
823
914
 
824
915
  • Parameters:
@@ -847,7 +938,7 @@ Append a ranged date classification column to the DataFrame.
847
938
 
848
939
  --------------------------------------------------------------------------------
849
940
 
850
- ### 31. `rename_columns`
941
+ ### 30. `rename_columns`
851
942
  Rename columns in the DataFrame.
852
943
 
853
944
  • Parameters:
@@ -869,7 +960,7 @@ Rename columns in the DataFrame.
869
960
 
870
961
  --------------------------------------------------------------------------------
871
962
 
872
- ### 32. `cascade_sort`
963
+ ### 31. `cascade_sort`
873
964
  Cascade sort the DataFrame by specified columns and order.
874
965
 
875
966
  • Parameters:
@@ -895,7 +986,7 @@ Cascade sort the DataFrame by specified columns and order.
895
986
 
896
987
  --------------------------------------------------------------------------------
897
988
 
898
- ### 33. `append_xgb_labels`
989
+ ### 32. `append_xgb_labels`
899
990
  Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
900
991
 
901
992
  • Parameters:
@@ -917,7 +1008,7 @@ Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
917
1008
 
918
1009
  --------------------------------------------------------------------------------
919
1010
 
920
- ### 34. `append_xgb_regression_predictions`
1011
+ ### 33. `append_xgb_regression_predictions`
921
1012
  Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` column for TRAIN/TEST splits.
922
1013
 
923
1014
  • Parameters:
@@ -949,7 +1040,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
949
1040
 
950
1041
  --------------------------------------------------------------------------------
951
1042
 
952
- ### 35. `append_xgb_logistic_regression_predictions`
1043
+ ### 34. `append_xgb_logistic_regression_predictions`
953
1044
  Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TYPE` column for TRAIN/TEST splits.
954
1045
 
955
1046
  • Parameters:
@@ -981,7 +1072,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
981
1072
 
982
1073
  --------------------------------------------------------------------------------
983
1074
 
984
- ### 36. `print_n_frequency_cascading`
1075
+ ### 35. `print_n_frequency_cascading`
985
1076
  Print the cascading frequency of top n values for specified columns.
986
1077
 
987
1078
  • Parameters:
@@ -1001,27 +1092,36 @@ Print the cascading frequency of top n values for specified columns.
1001
1092
 
1002
1093
  --------------------------------------------------------------------------------
1003
1094
 
1004
- ### 37. `print_n_frequency_linear`
1005
- Print the linear frequency of top n values for specified columns.
1095
+ ### 36. `print_n_frequency_linear`
1006
1096
 
1007
- Parameters:
1008
- - df (pd.DataFrame)
1009
- - n (int)
1010
- - columns (str): Comma-separated columns.
1011
- - `order_by` (str)
1097
+ Prints the linear frequency of the top `n` values for specified columns.
1098
+
1099
+ #### Parameters:
1100
+ - **df** (`pd.DataFrame`): The DataFrame to analyze.
1101
+ - **n** (`int`): The number of top values to print for each column.
1102
+ - **columns** (`list`): A list of column names to be analyzed.
1103
+ - **order_by** (`str`): The order of frequency. The available options are:
1104
+ - `"ASC"`: Sort keys in ascending lexicographical order.
1105
+ - `"DESC"`: Sort keys in descending lexicographical order.
1106
+ - `"FREQ_ASC"`: Sort the frequencies in ascending order (least frequent first).
1107
+ - `"FREQ_DESC"`: Sort the frequencies in descending order (most frequent first).
1108
+ - `"BY_KEYS_ASC"`: Sort keys in ascending order, numerically if possible, handling special strings like 'NaN' as typical entries.
1109
+ - `"BY_KEYS_DESC"`: Sort keys in descending order, numerically if possible, handling special strings like 'NaN' as typical entries.
1110
+
1111
+ #### Example:
1012
1112
 
1013
- • Example:
1014
-
1015
1113
  from rgwfuncs import print_n_frequency_linear
1016
1114
  import pandas as pd
1017
1115
 
1018
- df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
1019
- print_n_frequency_linear(df, 2, 'City', 'FREQ_DESC')
1020
-
1116
+ df = pd.DataFrame({'City': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
1117
+ print_n_frequency_linear(df, 2, ['City'], 'FREQ_DESC')
1118
+
1119
+ This example analyzes the `City` column, printing the top 2 most frequent values in descending order of frequency.
1120
+
1021
1121
 
1022
1122
  --------------------------------------------------------------------------------
1023
1123
 
1024
- ### 38. `retain_columns`
1124
+ ### 37. `retain_columns`
1025
1125
  Retain specified columns in the DataFrame and drop the others.
1026
1126
 
1027
1127
  • Parameters:
@@ -1043,7 +1143,7 @@ Retain specified columns in the DataFrame and drop the others.
1043
1143
 
1044
1144
  --------------------------------------------------------------------------------
1045
1145
 
1046
- ### 39. `mask_against_dataframe`
1146
+ ### 38. `mask_against_dataframe`
1047
1147
  Retain only rows with common column values between two DataFrames.
1048
1148
 
1049
1149
  • Parameters:
@@ -1068,7 +1168,7 @@ Retain only rows with common column values between two DataFrames.
1068
1168
 
1069
1169
  --------------------------------------------------------------------------------
1070
1170
 
1071
- ### 40. `mask_against_dataframe_converse`
1171
+ ### 39. `mask_against_dataframe_converse`
1072
1172
  Retain only rows with uncommon column values between two DataFrames.
1073
1173
 
1074
1174
  • Parameters:
@@ -1093,7 +1193,7 @@ Retain only rows with uncommon column values between two DataFrames.
1093
1193
 
1094
1194
  --------------------------------------------------------------------------------
1095
1195
 
1096
- ### 41. `union_join`
1196
+ ### 40. `union_join`
1097
1197
  Perform a union join, concatenating two DataFrames and dropping duplicates.
1098
1198
 
1099
1199
  • Parameters:
@@ -1116,7 +1216,7 @@ Perform a union join, concatenating two DataFrames and dropping duplicates.
1116
1216
 
1117
1217
  --------------------------------------------------------------------------------
1118
1218
 
1119
- ### 42. `bag_union_join`
1219
+ ### 41. `bag_union_join`
1120
1220
  Perform a bag union join, concatenating two DataFrames without dropping duplicates.
1121
1221
 
1122
1222
  • Parameters:
@@ -1139,7 +1239,7 @@ Perform a bag union join, concatenating two DataFrames without dropping duplicat
1139
1239
 
1140
1240
  --------------------------------------------------------------------------------
1141
1241
 
1142
- ### 43. `left_join`
1242
+ ### 42. `left_join`
1143
1243
  Perform a left join on two DataFrames.
1144
1244
 
1145
1245
  • Parameters:
@@ -1164,7 +1264,7 @@ Perform a left join on two DataFrames.
1164
1264
 
1165
1265
  --------------------------------------------------------------------------------
1166
1266
 
1167
- ### 44. `right_join`
1267
+ ### 43. `right_join`
1168
1268
  Perform a right join on two DataFrames.
1169
1269
 
1170
1270
  • Parameters:
@@ -1189,7 +1289,7 @@ Perform a right join on two DataFrames.
1189
1289
 
1190
1290
  --------------------------------------------------------------------------------
1191
1291
 
1192
- ### 45. `insert_dataframe_in_sqlite_database`
1292
+ ### 44. `insert_dataframe_in_sqlite_database`
1193
1293
 
1194
1294
  Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
1195
1295
 
@@ -1227,7 +1327,7 @@ Inserts a Pandas DataFrame into a SQLite database table. If the specified table
1227
1327
 
1228
1328
  --------------------------------------------------------------------------------
1229
1329
 
1230
- ### 46. `sync_dataframe_to_sqlite_database`
1330
+ ### 45. `sync_dataframe_to_sqlite_database`
1231
1331
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1232
1332
 
1233
1333
  • Parameters:
@@ -1251,6 +1351,8 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
1251
1351
 
1252
1352
  --------------------------------------------------------------------------------
1253
1353
 
1354
+
1355
+
1254
1356
  ## Additional Info
1255
1357
 
1256
1358
  For more information, refer to each function’s docstring by calling:
@@ -0,0 +1,11 @@
1
+ rgwfuncs/__init__.py,sha256=SZg1HPP5D_3QimoYFH8zongQ9D9XPZWp-Qi-MZglvXw,1315
2
+ rgwfuncs/algebra_lib.py,sha256=aayZogB2Rp9JAo5kVHpauqX_R346eI_rIuE5QNEMlKM,7789
3
+ rgwfuncs/df_lib.py,sha256=OfbnAii_RND_euTJVou9nJaDqRLNbIMTCbaBelAUDvk,66247
4
+ rgwfuncs/docs_lib.py,sha256=vlO8Rr6PYzyd2ZAenV_6t_iZJ3CoHja3PSLJverlAT4,1941
5
+ rgwfuncs/str_lib.py,sha256=PHvxAg7_mZ_xe7DWJiAQ-PQ2fkYddKe8G0iQ1L78aZ0,2252
6
+ rgwfuncs-0.0.25.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
7
+ rgwfuncs-0.0.25.dist-info/METADATA,sha256=RQIG8bS4SFwHTycxQXpaYnyw2C8UIkQZEe7EfivMwao,38637
8
+ rgwfuncs-0.0.25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
+ rgwfuncs-0.0.25.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
10
+ rgwfuncs-0.0.25.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
11
+ rgwfuncs-0.0.25.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=2nrp3c5VmVrKh0Ih6zELL8niH9nAHN0XnObqe-EpxlE,1169
2
- rgwfuncs/df_lib.py,sha256=8KMn4FucI19EFBHUoGOS7R4mo0degg6A6802sjy7BH4,67677
3
- rgwfuncs/str_lib.py,sha256=I5B0WOGaLUGaedMG7hqiKnIqV7Jc9h1RYlgOiC_-iGY,3678
4
- rgwfuncs-0.0.23.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
5
- rgwfuncs-0.0.23.dist-info/METADATA,sha256=_mVsZMv4umMXMW_Q2hBxABMm75pKuvJgMIMBldXxCtk,34680
6
- rgwfuncs-0.0.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
- rgwfuncs-0.0.23.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
8
- rgwfuncs-0.0.23.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
9
- rgwfuncs-0.0.23.dist-info/RECORD,,