rgwfuncs 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.3/src/rgwfuncs.egg-info → rgwfuncs-0.0.5}/PKG-INFO +90 -96
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/README.md +89 -95
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/pyproject.toml +1 -1
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/setup.cfg +1 -1
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5/src/rgwfuncs.egg-info}/PKG-INFO +90 -96
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/LICENSE +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs/__init__.py +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs/df_lib.py +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.3 → rgwfuncs-0.0.5}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.5
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -24,12 +24,10 @@ Requires-Dist: requests
|
|
24
24
|
Requires-Dist: slack-sdk
|
25
25
|
Requires-Dist: google-api-python-client
|
26
26
|
|
27
|
-
|
27
|
+
# RGWFUNCS
|
28
28
|
|
29
29
|
***By Ryan Gerard Wilson (https://ryangerardwilson.com)***
|
30
30
|
|
31
|
-
# RGWFuncs
|
32
|
-
|
33
31
|
This library provides a variety of functions for manipulating and analyzing pandas DataFrames.
|
34
32
|
|
35
33
|
--------------------------------------------------------------------------------
|
@@ -37,33 +35,29 @@ This library provides a variety of functions for manipulating and analyzing pand
|
|
37
35
|
## Installation
|
38
36
|
|
39
37
|
Install the package using:
|
40
|
-
|
38
|
+
bash
|
41
39
|
pip install rgwfuncs
|
42
|
-
|
40
|
+
|
43
41
|
|
44
42
|
--------------------------------------------------------------------------------
|
45
43
|
|
46
44
|
## Basic Usage
|
47
45
|
|
48
46
|
Import the library:
|
49
|
-
|
47
|
+
|
50
48
|
import rgwfuncs
|
51
|
-
```
|
52
49
|
|
53
50
|
View available function docstrings in alphabetical order:
|
54
|
-
|
51
|
+
|
55
52
|
rgwfuncs.docs()
|
56
|
-
```
|
57
53
|
|
58
54
|
View specific docstrings by providing a filter (comma-separated). For example, to display docstrings about "numeric_clean":
|
59
|
-
|
55
|
+
|
60
56
|
rgwfuncs.docs(method_type_filter='numeric_clean')
|
61
|
-
```
|
62
57
|
|
63
58
|
To display all docstrings, use:
|
64
|
-
|
59
|
+
|
65
60
|
rgwfuncs.docs(method_type_filter='*')
|
66
|
-
```
|
67
61
|
|
68
62
|
--------------------------------------------------------------------------------
|
69
63
|
|
@@ -124,14 +118,14 @@ Limit the DataFrame to a specified number of rows.
|
|
124
118
|
- pd.DataFrame: A new DataFrame limited to the specified number of rows.
|
125
119
|
|
126
120
|
• Example:
|
127
|
-
|
121
|
+
|
128
122
|
from rgwfuncs import limit_dataframe
|
129
123
|
import pandas as pd
|
130
124
|
|
131
125
|
df = pd.DataFrame({'A': range(10), 'B': range(10, 20)})
|
132
126
|
df_limited = limit_dataframe(df, 5)
|
133
127
|
print(df_limited)
|
134
|
-
|
128
|
+
|
135
129
|
--------------------------------------------------------------------------------
|
136
130
|
|
137
131
|
### 4. `from_raw_data`
|
@@ -145,7 +139,7 @@ Create a DataFrame from raw data.
|
|
145
139
|
- pd.DataFrame: A DataFrame created from the raw data.
|
146
140
|
|
147
141
|
• Example:
|
148
|
-
|
142
|
+
|
149
143
|
from rgwfuncs import from_raw_data
|
150
144
|
|
151
145
|
headers = ["Name", "Age"]
|
@@ -157,7 +151,7 @@ Create a DataFrame from raw data.
|
|
157
151
|
|
158
152
|
df = from_raw_data(headers, data)
|
159
153
|
print(df)
|
160
|
-
|
154
|
+
|
161
155
|
--------------------------------------------------------------------------------
|
162
156
|
|
163
157
|
### 5. `append_rows`
|
@@ -171,7 +165,7 @@ Append rows to the DataFrame.
|
|
171
165
|
- pd.DataFrame: A new DataFrame with appended rows.
|
172
166
|
|
173
167
|
• Example:
|
174
|
-
|
168
|
+
|
175
169
|
from rgwfuncs import append_rows
|
176
170
|
import pandas as pd
|
177
171
|
|
@@ -182,7 +176,7 @@ Append rows to the DataFrame.
|
|
182
176
|
]
|
183
177
|
df_appended = append_rows(df, new_rows)
|
184
178
|
print(df_appended)
|
185
|
-
|
179
|
+
|
186
180
|
--------------------------------------------------------------------------------
|
187
181
|
|
188
182
|
### 6. `append_columns`
|
@@ -196,14 +190,14 @@ Append new columns to the DataFrame with None values.
|
|
196
190
|
- pd.DataFrame: A new DataFrame with the new columns appended.
|
197
191
|
|
198
192
|
• Example:
|
199
|
-
|
193
|
+
|
200
194
|
from rgwfuncs import append_columns
|
201
195
|
import pandas as pd
|
202
196
|
|
203
197
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
204
198
|
df_new = append_columns(df, ['Salary', 'Department'])
|
205
199
|
print(df_new)
|
206
|
-
|
200
|
+
|
207
201
|
--------------------------------------------------------------------------------
|
208
202
|
|
209
203
|
### 7. `update_rows`
|
@@ -218,14 +212,14 @@ Update specific rows in the DataFrame based on a condition.
|
|
218
212
|
- pd.DataFrame: A new DataFrame with updated rows.
|
219
213
|
|
220
214
|
• Example:
|
221
|
-
|
215
|
+
|
222
216
|
from rgwfuncs import update_rows
|
223
217
|
import pandas as pd
|
224
218
|
|
225
219
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
226
220
|
df_updated = update_rows(df, "Name == 'Alice'", {'Age': 31})
|
227
221
|
print(df_updated)
|
228
|
-
|
222
|
+
|
229
223
|
--------------------------------------------------------------------------------
|
230
224
|
|
231
225
|
### 8. `delete_rows`
|
@@ -239,14 +233,14 @@ Delete rows from the DataFrame based on a condition.
|
|
239
233
|
- pd.DataFrame: The DataFrame with specified rows deleted.
|
240
234
|
|
241
235
|
• Example:
|
242
|
-
|
236
|
+
|
243
237
|
from rgwfuncs import delete_rows
|
244
238
|
import pandas as pd
|
245
239
|
|
246
240
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
247
241
|
df_deleted = delete_rows(df, "Age < 28")
|
248
242
|
print(df_deleted)
|
249
|
-
|
243
|
+
|
250
244
|
--------------------------------------------------------------------------------
|
251
245
|
|
252
246
|
### 9. `drop_duplicates`
|
@@ -259,14 +253,14 @@ Drop duplicate rows in the DataFrame, retaining the first occurrence.
|
|
259
253
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
260
254
|
|
261
255
|
• Example:
|
262
|
-
|
256
|
+
|
263
257
|
from rgwfuncs import drop_duplicates
|
264
258
|
import pandas as pd
|
265
259
|
|
266
260
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
267
261
|
df_no_dupes = drop_duplicates(df)
|
268
262
|
print(df_no_dupes)
|
269
|
-
|
263
|
+
|
270
264
|
--------------------------------------------------------------------------------
|
271
265
|
|
272
266
|
### 10. `drop_duplicates_retain_first`
|
@@ -280,14 +274,14 @@ Drop duplicate rows based on specified columns, retaining the first occurrence.
|
|
280
274
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
281
275
|
|
282
276
|
• Example:
|
283
|
-
|
277
|
+
|
284
278
|
from rgwfuncs import drop_duplicates_retain_first
|
285
279
|
import pandas as pd
|
286
280
|
|
287
281
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
288
282
|
df_no_dupes = drop_duplicates_retain_first(df, 'A')
|
289
283
|
print(df_no_dupes)
|
290
|
-
|
284
|
+
|
291
285
|
--------------------------------------------------------------------------------
|
292
286
|
|
293
287
|
### 11. `drop_duplicates_retain_last`
|
@@ -301,14 +295,14 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
301
295
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
302
296
|
|
303
297
|
• Example:
|
304
|
-
|
298
|
+
|
305
299
|
from rgwfuncs import drop_duplicates_retain_last
|
306
300
|
import pandas as pd
|
307
301
|
|
308
302
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
309
303
|
df_no_dupes = drop_duplicates_retain_last(df, 'A')
|
310
304
|
print(df_no_dupes)
|
311
|
-
|
305
|
+
|
312
306
|
|
313
307
|
--------------------------------------------------------------------------------
|
314
308
|
|
@@ -324,7 +318,7 @@ Load data from a database query into a DataFrame based on a configuration preset
|
|
324
318
|
- pd.DataFrame: A DataFrame containing the query result.
|
325
319
|
|
326
320
|
• Example:
|
327
|
-
|
321
|
+
|
328
322
|
from rgwfuncs import load_data_from_query
|
329
323
|
|
330
324
|
df = load_data_from_query(
|
@@ -333,7 +327,7 @@ Load data from a database query into a DataFrame based on a configuration preset
|
|
333
327
|
config_file_name="rgwml.config"
|
334
328
|
)
|
335
329
|
print(df)
|
336
|
-
|
330
|
+
|
337
331
|
|
338
332
|
--------------------------------------------------------------------------------
|
339
333
|
|
@@ -347,12 +341,12 @@ Load data from a file into a DataFrame based on the file extension.
|
|
347
341
|
- pd.DataFrame: A DataFrame containing the loaded data.
|
348
342
|
|
349
343
|
• Example:
|
350
|
-
|
344
|
+
|
351
345
|
from rgwfuncs import load_data_from_path
|
352
346
|
|
353
347
|
df = load_data_from_path("/absolute/path/to/data.csv")
|
354
348
|
print(df)
|
355
|
-
|
349
|
+
|
356
350
|
|
357
351
|
--------------------------------------------------------------------------------
|
358
352
|
|
@@ -367,12 +361,12 @@ Execute a query on a SQLite database file and return the results as a DataFrame.
|
|
367
361
|
- pd.DataFrame: A DataFrame containing the query results.
|
368
362
|
|
369
363
|
• Example:
|
370
|
-
|
364
|
+
|
371
365
|
from rgwfuncs import load_data_from_sqlite_path
|
372
366
|
|
373
367
|
df = load_data_from_sqlite_path("/path/to/database.db", "SELECT * FROM my_table")
|
374
368
|
print(df)
|
375
|
-
|
369
|
+
|
376
370
|
|
377
371
|
--------------------------------------------------------------------------------
|
378
372
|
|
@@ -384,13 +378,13 @@ Display the first n rows of the DataFrame (prints out in dictionary format).
|
|
384
378
|
- n (int): Number of rows to display.
|
385
379
|
|
386
380
|
• Example:
|
387
|
-
|
381
|
+
|
388
382
|
from rgwfuncs import first_n_rows
|
389
383
|
import pandas as pd
|
390
384
|
|
391
385
|
df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
|
392
386
|
first_n_rows(df, 2)
|
393
|
-
|
387
|
+
|
394
388
|
|
395
389
|
--------------------------------------------------------------------------------
|
396
390
|
|
@@ -402,13 +396,13 @@ Display the last n rows of the DataFrame (prints out in dictionary format).
|
|
402
396
|
- n (int): Number of rows to display.
|
403
397
|
|
404
398
|
• Example:
|
405
|
-
|
399
|
+
|
406
400
|
from rgwfuncs import last_n_rows
|
407
401
|
import pandas as pd
|
408
402
|
|
409
403
|
df = pd.DataFrame({'A': [1,2,3,4,5], 'B': [6,7,8,9,10]})
|
410
404
|
last_n_rows(df, 2)
|
411
|
-
|
405
|
+
|
412
406
|
|
413
407
|
--------------------------------------------------------------------------------
|
414
408
|
|
@@ -421,13 +415,13 @@ Print the top n unique values for specified columns in the DataFrame.
|
|
421
415
|
- columns (list): List of columns for which to display top unique values.
|
422
416
|
|
423
417
|
• Example:
|
424
|
-
|
418
|
+
|
425
419
|
from rgwfuncs import top_n_unique_values
|
426
420
|
import pandas as pd
|
427
421
|
|
428
422
|
df = pd.DataFrame({'Cities': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
|
429
423
|
top_n_unique_values(df, 2, ['Cities'])
|
430
|
-
|
424
|
+
|
431
425
|
|
432
426
|
--------------------------------------------------------------------------------
|
433
427
|
|
@@ -440,13 +434,13 @@ Print the bottom n unique values for specified columns in the DataFrame.
|
|
440
434
|
- columns (list)
|
441
435
|
|
442
436
|
• Example:
|
443
|
-
|
437
|
+
|
444
438
|
from rgwfuncs import bottom_n_unique_values
|
445
439
|
import pandas as pd
|
446
440
|
|
447
441
|
df = pd.DataFrame({'Cities': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
|
448
442
|
bottom_n_unique_values(df, 1, ['Cities'])
|
449
|
-
|
443
|
+
|
450
444
|
|
451
445
|
--------------------------------------------------------------------------------
|
452
446
|
|
@@ -458,7 +452,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
|
|
458
452
|
- `column_pairs` (list of tuples): E.g., `[('col1','col2'), ('colA','colB')]`.
|
459
453
|
|
460
454
|
• Example:
|
461
|
-
|
455
|
+
|
462
456
|
from rgwfuncs import print_correlation
|
463
457
|
import pandas as pd
|
464
458
|
|
@@ -471,7 +465,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
|
|
471
465
|
|
472
466
|
pairs = [('col1','col2'), ('colA','colB')]
|
473
467
|
print_correlation(df, pairs)
|
474
|
-
|
468
|
+
|
475
469
|
|
476
470
|
--------------------------------------------------------------------------------
|
477
471
|
|
@@ -482,13 +476,13 @@ Print the memory usage of the DataFrame in megabytes.
|
|
482
476
|
- df (pd.DataFrame)
|
483
477
|
|
484
478
|
• Example:
|
485
|
-
|
479
|
+
|
486
480
|
from rgwfuncs import print_memory_usage
|
487
481
|
import pandas as pd
|
488
482
|
|
489
483
|
df = pd.DataFrame({'A': range(1000)})
|
490
484
|
print_memory_usage(df)
|
491
|
-
|
485
|
+
|
492
486
|
|
493
487
|
--------------------------------------------------------------------------------
|
494
488
|
|
@@ -503,7 +497,7 @@ Return a new DataFrame filtered by a given query expression.
|
|
503
497
|
- pd.DataFrame
|
504
498
|
|
505
499
|
• Example:
|
506
|
-
|
500
|
+
|
507
501
|
from rgwfuncs import filter_dataframe
|
508
502
|
import pandas as pd
|
509
503
|
|
@@ -514,7 +508,7 @@ Return a new DataFrame filtered by a given query expression.
|
|
514
508
|
|
515
509
|
df_filtered = filter_dataframe(df, "Age > 23")
|
516
510
|
print(df_filtered)
|
517
|
-
|
511
|
+
|
518
512
|
|
519
513
|
--------------------------------------------------------------------------------
|
520
514
|
|
@@ -529,14 +523,14 @@ Filter and return rows containing valid Indian mobile numbers in the specified c
|
|
529
523
|
- pd.DataFrame
|
530
524
|
|
531
525
|
• Example:
|
532
|
-
|
526
|
+
|
533
527
|
from rgwfuncs import filter_indian_mobiles
|
534
528
|
import pandas as pd
|
535
529
|
|
536
530
|
df = pd.DataFrame({'Phone': ['9876543210', '12345', '7000012345']})
|
537
531
|
df_indian = filter_indian_mobiles(df, 'Phone')
|
538
532
|
print(df_indian)
|
539
|
-
|
533
|
+
|
540
534
|
|
541
535
|
--------------------------------------------------------------------------------
|
542
536
|
|
@@ -548,13 +542,13 @@ Print the entire DataFrame and its column types. Optionally print a source path.
|
|
548
542
|
- source (str, optional)
|
549
543
|
|
550
544
|
• Example:
|
551
|
-
|
545
|
+
|
552
546
|
from rgwfuncs import print_dataframe
|
553
547
|
import pandas as pd
|
554
548
|
|
555
549
|
df = pd.DataFrame({'Name': ['Alice'], 'Age': [30]})
|
556
550
|
print_dataframe(df, source='SampleData.csv')
|
557
|
-
|
551
|
+
|
558
552
|
|
559
553
|
--------------------------------------------------------------------------------
|
560
554
|
|
@@ -569,7 +563,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
569
563
|
- `remove_after_send` (bool)
|
570
564
|
|
571
565
|
• Example:
|
572
|
-
|
566
|
+
|
573
567
|
from rgwfuncs import send_dataframe_via_telegram
|
574
568
|
|
575
569
|
# Suppose your bot config is in "rgwml.config" under [TelegramBots] section
|
@@ -581,7 +575,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
581
575
|
as_file=True,
|
582
576
|
remove_after_send=True
|
583
577
|
)
|
584
|
-
|
578
|
+
|
585
579
|
|
586
580
|
--------------------------------------------------------------------------------
|
587
581
|
|
@@ -598,7 +592,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
598
592
|
- `remove_after_send` (bool)
|
599
593
|
|
600
594
|
• Example:
|
601
|
-
|
595
|
+
|
602
596
|
from rgwfuncs import send_data_to_email
|
603
597
|
|
604
598
|
df = ... # Some DataFrame
|
@@ -611,7 +605,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
611
605
|
as_file=True,
|
612
606
|
remove_after_send=True
|
613
607
|
)
|
614
|
-
|
608
|
+
|
615
609
|
|
616
610
|
--------------------------------------------------------------------------------
|
617
611
|
|
@@ -626,7 +620,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
626
620
|
- `remove_after_send` (bool)
|
627
621
|
|
628
622
|
• Example:
|
629
|
-
|
623
|
+
|
630
624
|
from rgwfuncs import send_data_to_slack
|
631
625
|
|
632
626
|
df = ... # Some DataFrame
|
@@ -637,7 +631,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
637
631
|
as_file=True,
|
638
632
|
remove_after_send=True
|
639
633
|
)
|
640
|
-
|
634
|
+
|
641
635
|
|
642
636
|
--------------------------------------------------------------------------------
|
643
637
|
|
@@ -652,14 +646,14 @@ Reorder the columns of a DataFrame based on a string input.
|
|
652
646
|
- pd.DataFrame
|
653
647
|
|
654
648
|
• Example:
|
655
|
-
|
649
|
+
|
656
650
|
from rgwfuncs import order_columns
|
657
651
|
import pandas as pd
|
658
652
|
|
659
653
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25], 'Salary': [1000, 1200]})
|
660
654
|
df_reordered = order_columns(df, 'Salary,Name,Age')
|
661
655
|
print(df_reordered)
|
662
|
-
|
656
|
+
|
663
657
|
|
664
658
|
--------------------------------------------------------------------------------
|
665
659
|
|
@@ -676,14 +670,14 @@ Append a ranged classification column to the DataFrame.
|
|
676
670
|
- pd.DataFrame
|
677
671
|
|
678
672
|
• Example:
|
679
|
-
|
673
|
+
|
680
674
|
from rgwfuncs import append_ranged_classification_column
|
681
675
|
import pandas as pd
|
682
676
|
|
683
677
|
df = pd.DataFrame({'Scores': [5, 12, 25]})
|
684
678
|
df_classified = append_ranged_classification_column(df, '0-10,11-20,21-30', 'Scores', 'ScoreRange')
|
685
679
|
print(df_classified)
|
686
|
-
|
680
|
+
|
687
681
|
|
688
682
|
--------------------------------------------------------------------------------
|
689
683
|
|
@@ -700,14 +694,14 @@ Append a percentile classification column to the DataFrame.
|
|
700
694
|
- pd.DataFrame
|
701
695
|
|
702
696
|
• Example:
|
703
|
-
|
697
|
+
|
704
698
|
from rgwfuncs import append_percentile_classification_column
|
705
699
|
import pandas as pd
|
706
700
|
|
707
701
|
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
|
708
702
|
df_classified = append_percentile_classification_column(df, '25,50,75', 'Values', 'ValuePercentile')
|
709
703
|
print(df_classified)
|
710
|
-
|
704
|
+
|
711
705
|
|
712
706
|
--------------------------------------------------------------------------------
|
713
707
|
|
@@ -724,7 +718,7 @@ Append a ranged date classification column to the DataFrame.
|
|
724
718
|
- pd.DataFrame
|
725
719
|
|
726
720
|
• Example:
|
727
|
-
|
721
|
+
|
728
722
|
from rgwfuncs import append_ranged_date_classification_column
|
729
723
|
import pandas as pd
|
730
724
|
|
@@ -736,7 +730,7 @@ Append a ranged date classification column to the DataFrame.
|
|
736
730
|
'DateRange'
|
737
731
|
)
|
738
732
|
print(df_classified)
|
739
|
-
|
733
|
+
|
740
734
|
|
741
735
|
--------------------------------------------------------------------------------
|
742
736
|
|
@@ -751,14 +745,14 @@ Rename columns in the DataFrame.
|
|
751
745
|
- pd.DataFrame
|
752
746
|
|
753
747
|
• Example:
|
754
|
-
|
748
|
+
|
755
749
|
from rgwfuncs import rename_columns
|
756
750
|
import pandas as pd
|
757
751
|
|
758
752
|
df = pd.DataFrame({'OldName': [1,2,3]})
|
759
753
|
df_renamed = rename_columns(df, {'OldName': 'NewName'})
|
760
754
|
print(df_renamed)
|
761
|
-
|
755
|
+
|
762
756
|
|
763
757
|
--------------------------------------------------------------------------------
|
764
758
|
|
@@ -773,7 +767,7 @@ Cascade sort the DataFrame by specified columns and order.
|
|
773
767
|
- pd.DataFrame
|
774
768
|
|
775
769
|
• Example:
|
776
|
-
|
770
|
+
|
777
771
|
from rgwfuncs import cascade_sort
|
778
772
|
import pandas as pd
|
779
773
|
|
@@ -784,7 +778,7 @@ Cascade sort the DataFrame by specified columns and order.
|
|
784
778
|
|
785
779
|
sorted_df = cascade_sort(df, ["Name::ASC", "Age::DESC"])
|
786
780
|
print(sorted_df)
|
787
|
-
|
781
|
+
|
788
782
|
|
789
783
|
--------------------------------------------------------------------------------
|
790
784
|
|
@@ -799,14 +793,14 @@ Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
|
|
799
793
|
- pd.DataFrame
|
800
794
|
|
801
795
|
• Example:
|
802
|
-
|
796
|
+
|
803
797
|
from rgwfuncs import append_xgb_labels
|
804
798
|
import pandas as pd
|
805
799
|
|
806
800
|
df = pd.DataFrame({'A': range(10)})
|
807
801
|
df_labeled = append_xgb_labels(df, "7:2:1")
|
808
802
|
print(df_labeled)
|
809
|
-
|
803
|
+
|
810
804
|
|
811
805
|
--------------------------------------------------------------------------------
|
812
806
|
|
@@ -825,7 +819,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
|
|
825
819
|
- pd.DataFrame
|
826
820
|
|
827
821
|
• Example:
|
828
|
-
|
822
|
+
|
829
823
|
from rgwfuncs import append_xgb_regression_predictions
|
830
824
|
import pandas as pd
|
831
825
|
|
@@ -838,7 +832,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
|
|
838
832
|
|
839
833
|
df_pred = append_xgb_regression_predictions(df, 'Target', 'Feature1,Feature2', 'PredictedTarget')
|
840
834
|
print(df_pred)
|
841
|
-
|
835
|
+
|
842
836
|
|
843
837
|
--------------------------------------------------------------------------------
|
844
838
|
|
@@ -857,7 +851,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
|
|
857
851
|
- pd.DataFrame
|
858
852
|
|
859
853
|
• Example:
|
860
|
-
|
854
|
+
|
861
855
|
from rgwfuncs import append_xgb_logistic_regression_predictions
|
862
856
|
import pandas as pd
|
863
857
|
|
@@ -870,7 +864,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
|
|
870
864
|
|
871
865
|
df_pred = append_xgb_logistic_regression_predictions(df, 'Target', 'Feature1,Feature2', 'PredictedTarget')
|
872
866
|
print(df_pred)
|
873
|
-
|
867
|
+
|
874
868
|
|
875
869
|
--------------------------------------------------------------------------------
|
876
870
|
|
@@ -884,13 +878,13 @@ Print the cascading frequency of top n values for specified columns.
|
|
884
878
|
- `order_by` (str): `ASC`, `DESC`, `FREQ_ASC`, `FREQ_DESC`.
|
885
879
|
|
886
880
|
• Example:
|
887
|
-
|
881
|
+
|
888
882
|
from rgwfuncs import print_n_frequency_cascading
|
889
883
|
import pandas as pd
|
890
884
|
|
891
885
|
df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
|
892
886
|
print_n_frequency_cascading(df, 2, 'City', 'FREQ_DESC')
|
893
|
-
|
887
|
+
|
894
888
|
|
895
889
|
--------------------------------------------------------------------------------
|
896
890
|
|
@@ -904,13 +898,13 @@ Print the linear frequency of top n values for specified columns.
|
|
904
898
|
- `order_by` (str)
|
905
899
|
|
906
900
|
• Example:
|
907
|
-
|
901
|
+
|
908
902
|
from rgwfuncs import print_n_frequency_linear
|
909
903
|
import pandas as pd
|
910
904
|
|
911
905
|
df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
|
912
906
|
print_n_frequency_linear(df, 2, 'City', 'FREQ_DESC')
|
913
|
-
|
907
|
+
|
914
908
|
|
915
909
|
--------------------------------------------------------------------------------
|
916
910
|
|
@@ -925,14 +919,14 @@ Retain specified columns in the DataFrame and drop the others.
|
|
925
919
|
- pd.DataFrame
|
926
920
|
|
927
921
|
• Example:
|
928
|
-
|
922
|
+
|
929
923
|
from rgwfuncs import retain_columns
|
930
924
|
import pandas as pd
|
931
925
|
|
932
926
|
df = pd.DataFrame({'A': [1,2], 'B': [3,4], 'C': [5,6]})
|
933
927
|
df_reduced = retain_columns(df, ['A','C'])
|
934
928
|
print(df_reduced)
|
935
|
-
|
929
|
+
|
936
930
|
|
937
931
|
--------------------------------------------------------------------------------
|
938
932
|
|
@@ -948,7 +942,7 @@ Retain only rows with common column values between two DataFrames.
|
|
948
942
|
- pd.DataFrame
|
949
943
|
|
950
944
|
• Example:
|
951
|
-
|
945
|
+
|
952
946
|
from rgwfuncs import mask_against_dataframe
|
953
947
|
import pandas as pd
|
954
948
|
|
@@ -957,7 +951,7 @@ Retain only rows with common column values between two DataFrames.
|
|
957
951
|
|
958
952
|
df_masked = mask_against_dataframe(df1, df2, 'ID')
|
959
953
|
print(df_masked)
|
960
|
-
|
954
|
+
|
961
955
|
|
962
956
|
--------------------------------------------------------------------------------
|
963
957
|
|
@@ -973,7 +967,7 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
973
967
|
- pd.DataFrame
|
974
968
|
|
975
969
|
• Example:
|
976
|
-
|
970
|
+
|
977
971
|
from rgwfuncs import mask_against_dataframe_converse
|
978
972
|
import pandas as pd
|
979
973
|
|
@@ -982,20 +976,20 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
982
976
|
|
983
977
|
df_uncommon = mask_against_dataframe_converse(df1, df2, 'ID')
|
984
978
|
print(df_uncommon)
|
985
|
-
|
979
|
+
|
986
980
|
|
987
981
|
--------------------------------------------------------------------------------
|
988
982
|
|
989
983
|
## Additional Info
|
990
984
|
|
991
985
|
For more information, refer to each function’s docstring by calling:
|
992
|
-
|
993
|
-
rgwfuncs.docs(method_type_filter='function_name')
|
994
|
-
|
986
|
+
|
987
|
+
rgwfuncs.docs(method_type_filter='function_name')
|
988
|
+
|
995
989
|
or display all docstrings with:
|
996
|
-
|
997
|
-
rgwfuncs.docs(method_type_filter='*')
|
998
|
-
|
990
|
+
|
991
|
+
rgwfuncs.docs(method_type_filter='*')
|
992
|
+
|
999
993
|
|
1000
994
|
--------------------------------------------------------------------------------
|
1001
995
|
|