rgwfuncs 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.3.dist-info → rgwfuncs-0.0.4.dist-info}/METADATA +89 -93
- rgwfuncs-0.0.4.dist-info/RECORD +8 -0
- rgwfuncs-0.0.3.dist-info/RECORD +0 -8
- {rgwfuncs-0.0.3.dist-info → rgwfuncs-0.0.4.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.3.dist-info → rgwfuncs-0.0.4.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.3.dist-info → rgwfuncs-0.0.4.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.3.dist-info → rgwfuncs-0.0.4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.4
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -37,33 +37,29 @@ This library provides a variety of functions for manipulating and analyzing pand
|
|
37
37
|
## Installation
|
38
38
|
|
39
39
|
Install the package using:
|
40
|
-
|
40
|
+
bash
|
41
41
|
pip install rgwfuncs
|
42
|
-
|
42
|
+
|
43
43
|
|
44
44
|
--------------------------------------------------------------------------------
|
45
45
|
|
46
46
|
## Basic Usage
|
47
47
|
|
48
48
|
Import the library:
|
49
|
-
|
49
|
+
|
50
50
|
import rgwfuncs
|
51
|
-
```
|
52
51
|
|
53
52
|
View available function docstrings in alphabetical order:
|
54
|
-
|
53
|
+
|
55
54
|
rgwfuncs.docs()
|
56
|
-
```
|
57
55
|
|
58
56
|
View specific docstrings by providing a filter (comma-separated). For example, to display docstrings about "numeric_clean":
|
59
|
-
|
57
|
+
|
60
58
|
rgwfuncs.docs(method_type_filter='numeric_clean')
|
61
|
-
```
|
62
59
|
|
63
60
|
To display all docstrings, use:
|
64
|
-
|
61
|
+
|
65
62
|
rgwfuncs.docs(method_type_filter='*')
|
66
|
-
```
|
67
63
|
|
68
64
|
--------------------------------------------------------------------------------
|
69
65
|
|
@@ -124,14 +120,14 @@ Limit the DataFrame to a specified number of rows.
|
|
124
120
|
- pd.DataFrame: A new DataFrame limited to the specified number of rows.
|
125
121
|
|
126
122
|
• Example:
|
127
|
-
|
123
|
+
|
128
124
|
from rgwfuncs import limit_dataframe
|
129
125
|
import pandas as pd
|
130
126
|
|
131
127
|
df = pd.DataFrame({'A': range(10), 'B': range(10, 20)})
|
132
128
|
df_limited = limit_dataframe(df, 5)
|
133
129
|
print(df_limited)
|
134
|
-
|
130
|
+
|
135
131
|
--------------------------------------------------------------------------------
|
136
132
|
|
137
133
|
### 4. `from_raw_data`
|
@@ -145,7 +141,7 @@ Create a DataFrame from raw data.
|
|
145
141
|
- pd.DataFrame: A DataFrame created from the raw data.
|
146
142
|
|
147
143
|
• Example:
|
148
|
-
|
144
|
+
|
149
145
|
from rgwfuncs import from_raw_data
|
150
146
|
|
151
147
|
headers = ["Name", "Age"]
|
@@ -157,7 +153,7 @@ Create a DataFrame from raw data.
|
|
157
153
|
|
158
154
|
df = from_raw_data(headers, data)
|
159
155
|
print(df)
|
160
|
-
|
156
|
+
|
161
157
|
--------------------------------------------------------------------------------
|
162
158
|
|
163
159
|
### 5. `append_rows`
|
@@ -171,7 +167,7 @@ Append rows to the DataFrame.
|
|
171
167
|
- pd.DataFrame: A new DataFrame with appended rows.
|
172
168
|
|
173
169
|
• Example:
|
174
|
-
|
170
|
+
|
175
171
|
from rgwfuncs import append_rows
|
176
172
|
import pandas as pd
|
177
173
|
|
@@ -182,7 +178,7 @@ Append rows to the DataFrame.
|
|
182
178
|
]
|
183
179
|
df_appended = append_rows(df, new_rows)
|
184
180
|
print(df_appended)
|
185
|
-
|
181
|
+
|
186
182
|
--------------------------------------------------------------------------------
|
187
183
|
|
188
184
|
### 6. `append_columns`
|
@@ -196,14 +192,14 @@ Append new columns to the DataFrame with None values.
|
|
196
192
|
- pd.DataFrame: A new DataFrame with the new columns appended.
|
197
193
|
|
198
194
|
• Example:
|
199
|
-
|
195
|
+
|
200
196
|
from rgwfuncs import append_columns
|
201
197
|
import pandas as pd
|
202
198
|
|
203
199
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
204
200
|
df_new = append_columns(df, ['Salary', 'Department'])
|
205
201
|
print(df_new)
|
206
|
-
|
202
|
+
|
207
203
|
--------------------------------------------------------------------------------
|
208
204
|
|
209
205
|
### 7. `update_rows`
|
@@ -218,14 +214,14 @@ Update specific rows in the DataFrame based on a condition.
|
|
218
214
|
- pd.DataFrame: A new DataFrame with updated rows.
|
219
215
|
|
220
216
|
• Example:
|
221
|
-
|
217
|
+
|
222
218
|
from rgwfuncs import update_rows
|
223
219
|
import pandas as pd
|
224
220
|
|
225
221
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
226
222
|
df_updated = update_rows(df, "Name == 'Alice'", {'Age': 31})
|
227
223
|
print(df_updated)
|
228
|
-
|
224
|
+
|
229
225
|
--------------------------------------------------------------------------------
|
230
226
|
|
231
227
|
### 8. `delete_rows`
|
@@ -239,14 +235,14 @@ Delete rows from the DataFrame based on a condition.
|
|
239
235
|
- pd.DataFrame: The DataFrame with specified rows deleted.
|
240
236
|
|
241
237
|
• Example:
|
242
|
-
|
238
|
+
|
243
239
|
from rgwfuncs import delete_rows
|
244
240
|
import pandas as pd
|
245
241
|
|
246
242
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
|
247
243
|
df_deleted = delete_rows(df, "Age < 28")
|
248
244
|
print(df_deleted)
|
249
|
-
|
245
|
+
|
250
246
|
--------------------------------------------------------------------------------
|
251
247
|
|
252
248
|
### 9. `drop_duplicates`
|
@@ -259,14 +255,14 @@ Drop duplicate rows in the DataFrame, retaining the first occurrence.
|
|
259
255
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
260
256
|
|
261
257
|
• Example:
|
262
|
-
|
258
|
+
|
263
259
|
from rgwfuncs import drop_duplicates
|
264
260
|
import pandas as pd
|
265
261
|
|
266
262
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
267
263
|
df_no_dupes = drop_duplicates(df)
|
268
264
|
print(df_no_dupes)
|
269
|
-
|
265
|
+
|
270
266
|
--------------------------------------------------------------------------------
|
271
267
|
|
272
268
|
### 10. `drop_duplicates_retain_first`
|
@@ -280,14 +276,14 @@ Drop duplicate rows based on specified columns, retaining the first occurrence.
|
|
280
276
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
281
277
|
|
282
278
|
• Example:
|
283
|
-
|
279
|
+
|
284
280
|
from rgwfuncs import drop_duplicates_retain_first
|
285
281
|
import pandas as pd
|
286
282
|
|
287
283
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
288
284
|
df_no_dupes = drop_duplicates_retain_first(df, 'A')
|
289
285
|
print(df_no_dupes)
|
290
|
-
|
286
|
+
|
291
287
|
--------------------------------------------------------------------------------
|
292
288
|
|
293
289
|
### 11. `drop_duplicates_retain_last`
|
@@ -301,14 +297,14 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
301
297
|
- pd.DataFrame: A new DataFrame with duplicates removed.
|
302
298
|
|
303
299
|
• Example:
|
304
|
-
|
300
|
+
|
305
301
|
from rgwfuncs import drop_duplicates_retain_last
|
306
302
|
import pandas as pd
|
307
303
|
|
308
304
|
df = pd.DataFrame({'A': [1,1,2,2], 'B': [3,3,4,4]})
|
309
305
|
df_no_dupes = drop_duplicates_retain_last(df, 'A')
|
310
306
|
print(df_no_dupes)
|
311
|
-
|
307
|
+
|
312
308
|
|
313
309
|
--------------------------------------------------------------------------------
|
314
310
|
|
@@ -324,7 +320,7 @@ Load data from a database query into a DataFrame based on a configuration preset
|
|
324
320
|
- pd.DataFrame: A DataFrame containing the query result.
|
325
321
|
|
326
322
|
• Example:
|
327
|
-
|
323
|
+
|
328
324
|
from rgwfuncs import load_data_from_query
|
329
325
|
|
330
326
|
df = load_data_from_query(
|
@@ -333,7 +329,7 @@ Load data from a database query into a DataFrame based on a configuration preset
|
|
333
329
|
config_file_name="rgwml.config"
|
334
330
|
)
|
335
331
|
print(df)
|
336
|
-
|
332
|
+
|
337
333
|
|
338
334
|
--------------------------------------------------------------------------------
|
339
335
|
|
@@ -347,12 +343,12 @@ Load data from a file into a DataFrame based on the file extension.
|
|
347
343
|
- pd.DataFrame: A DataFrame containing the loaded data.
|
348
344
|
|
349
345
|
• Example:
|
350
|
-
|
346
|
+
|
351
347
|
from rgwfuncs import load_data_from_path
|
352
348
|
|
353
349
|
df = load_data_from_path("/absolute/path/to/data.csv")
|
354
350
|
print(df)
|
355
|
-
|
351
|
+
|
356
352
|
|
357
353
|
--------------------------------------------------------------------------------
|
358
354
|
|
@@ -367,12 +363,12 @@ Execute a query on a SQLite database file and return the results as a DataFrame.
|
|
367
363
|
- pd.DataFrame: A DataFrame containing the query results.
|
368
364
|
|
369
365
|
• Example:
|
370
|
-
|
366
|
+
|
371
367
|
from rgwfuncs import load_data_from_sqlite_path
|
372
368
|
|
373
369
|
df = load_data_from_sqlite_path("/path/to/database.db", "SELECT * FROM my_table")
|
374
370
|
print(df)
|
375
|
-
|
371
|
+
|
376
372
|
|
377
373
|
--------------------------------------------------------------------------------
|
378
374
|
|
@@ -384,13 +380,13 @@ Display the first n rows of the DataFrame (prints out in dictionary format).
|
|
384
380
|
- n (int): Number of rows to display.
|
385
381
|
|
386
382
|
• Example:
|
387
|
-
|
383
|
+
|
388
384
|
from rgwfuncs import first_n_rows
|
389
385
|
import pandas as pd
|
390
386
|
|
391
387
|
df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
|
392
388
|
first_n_rows(df, 2)
|
393
|
-
|
389
|
+
|
394
390
|
|
395
391
|
--------------------------------------------------------------------------------
|
396
392
|
|
@@ -402,13 +398,13 @@ Display the last n rows of the DataFrame (prints out in dictionary format).
|
|
402
398
|
- n (int): Number of rows to display.
|
403
399
|
|
404
400
|
• Example:
|
405
|
-
|
401
|
+
|
406
402
|
from rgwfuncs import last_n_rows
|
407
403
|
import pandas as pd
|
408
404
|
|
409
405
|
df = pd.DataFrame({'A': [1,2,3,4,5], 'B': [6,7,8,9,10]})
|
410
406
|
last_n_rows(df, 2)
|
411
|
-
|
407
|
+
|
412
408
|
|
413
409
|
--------------------------------------------------------------------------------
|
414
410
|
|
@@ -421,13 +417,13 @@ Print the top n unique values for specified columns in the DataFrame.
|
|
421
417
|
- columns (list): List of columns for which to display top unique values.
|
422
418
|
|
423
419
|
• Example:
|
424
|
-
|
420
|
+
|
425
421
|
from rgwfuncs import top_n_unique_values
|
426
422
|
import pandas as pd
|
427
423
|
|
428
424
|
df = pd.DataFrame({'Cities': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
|
429
425
|
top_n_unique_values(df, 2, ['Cities'])
|
430
|
-
|
426
|
+
|
431
427
|
|
432
428
|
--------------------------------------------------------------------------------
|
433
429
|
|
@@ -440,13 +436,13 @@ Print the bottom n unique values for specified columns in the DataFrame.
|
|
440
436
|
- columns (list)
|
441
437
|
|
442
438
|
• Example:
|
443
|
-
|
439
|
+
|
444
440
|
from rgwfuncs import bottom_n_unique_values
|
445
441
|
import pandas as pd
|
446
442
|
|
447
443
|
df = pd.DataFrame({'Cities': ['NY', 'LA', 'NY', 'SF', 'LA', 'LA']})
|
448
444
|
bottom_n_unique_values(df, 1, ['Cities'])
|
449
|
-
|
445
|
+
|
450
446
|
|
451
447
|
--------------------------------------------------------------------------------
|
452
448
|
|
@@ -458,7 +454,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
|
|
458
454
|
- `column_pairs` (list of tuples): E.g., `[('col1','col2'), ('colA','colB')]`.
|
459
455
|
|
460
456
|
• Example:
|
461
|
-
|
457
|
+
|
462
458
|
from rgwfuncs import print_correlation
|
463
459
|
import pandas as pd
|
464
460
|
|
@@ -471,7 +467,7 @@ Print correlation for multiple pairs of columns in the DataFrame.
|
|
471
467
|
|
472
468
|
pairs = [('col1','col2'), ('colA','colB')]
|
473
469
|
print_correlation(df, pairs)
|
474
|
-
|
470
|
+
|
475
471
|
|
476
472
|
--------------------------------------------------------------------------------
|
477
473
|
|
@@ -482,13 +478,13 @@ Print the memory usage of the DataFrame in megabytes.
|
|
482
478
|
- df (pd.DataFrame)
|
483
479
|
|
484
480
|
• Example:
|
485
|
-
|
481
|
+
|
486
482
|
from rgwfuncs import print_memory_usage
|
487
483
|
import pandas as pd
|
488
484
|
|
489
485
|
df = pd.DataFrame({'A': range(1000)})
|
490
486
|
print_memory_usage(df)
|
491
|
-
|
487
|
+
|
492
488
|
|
493
489
|
--------------------------------------------------------------------------------
|
494
490
|
|
@@ -503,7 +499,7 @@ Return a new DataFrame filtered by a given query expression.
|
|
503
499
|
- pd.DataFrame
|
504
500
|
|
505
501
|
• Example:
|
506
|
-
|
502
|
+
|
507
503
|
from rgwfuncs import filter_dataframe
|
508
504
|
import pandas as pd
|
509
505
|
|
@@ -514,7 +510,7 @@ Return a new DataFrame filtered by a given query expression.
|
|
514
510
|
|
515
511
|
df_filtered = filter_dataframe(df, "Age > 23")
|
516
512
|
print(df_filtered)
|
517
|
-
|
513
|
+
|
518
514
|
|
519
515
|
--------------------------------------------------------------------------------
|
520
516
|
|
@@ -529,14 +525,14 @@ Filter and return rows containing valid Indian mobile numbers in the specified c
|
|
529
525
|
- pd.DataFrame
|
530
526
|
|
531
527
|
• Example:
|
532
|
-
|
528
|
+
|
533
529
|
from rgwfuncs import filter_indian_mobiles
|
534
530
|
import pandas as pd
|
535
531
|
|
536
532
|
df = pd.DataFrame({'Phone': ['9876543210', '12345', '7000012345']})
|
537
533
|
df_indian = filter_indian_mobiles(df, 'Phone')
|
538
534
|
print(df_indian)
|
539
|
-
|
535
|
+
|
540
536
|
|
541
537
|
--------------------------------------------------------------------------------
|
542
538
|
|
@@ -548,13 +544,13 @@ Print the entire DataFrame and its column types. Optionally print a source path.
|
|
548
544
|
- source (str, optional)
|
549
545
|
|
550
546
|
• Example:
|
551
|
-
|
547
|
+
|
552
548
|
from rgwfuncs import print_dataframe
|
553
549
|
import pandas as pd
|
554
550
|
|
555
551
|
df = pd.DataFrame({'Name': ['Alice'], 'Age': [30]})
|
556
552
|
print_dataframe(df, source='SampleData.csv')
|
557
|
-
|
553
|
+
|
558
554
|
|
559
555
|
--------------------------------------------------------------------------------
|
560
556
|
|
@@ -569,7 +565,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
569
565
|
- `remove_after_send` (bool)
|
570
566
|
|
571
567
|
• Example:
|
572
|
-
|
568
|
+
|
573
569
|
from rgwfuncs import send_dataframe_via_telegram
|
574
570
|
|
575
571
|
# Suppose your bot config is in "rgwml.config" under [TelegramBots] section
|
@@ -581,7 +577,7 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
581
577
|
as_file=True,
|
582
578
|
remove_after_send=True
|
583
579
|
)
|
584
|
-
|
580
|
+
|
585
581
|
|
586
582
|
--------------------------------------------------------------------------------
|
587
583
|
|
@@ -598,7 +594,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
598
594
|
- `remove_after_send` (bool)
|
599
595
|
|
600
596
|
• Example:
|
601
|
-
|
597
|
+
|
602
598
|
from rgwfuncs import send_data_to_email
|
603
599
|
|
604
600
|
df = ... # Some DataFrame
|
@@ -611,7 +607,7 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
611
607
|
as_file=True,
|
612
608
|
remove_after_send=True
|
613
609
|
)
|
614
|
-
|
610
|
+
|
615
611
|
|
616
612
|
--------------------------------------------------------------------------------
|
617
613
|
|
@@ -626,7 +622,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
626
622
|
- `remove_after_send` (bool)
|
627
623
|
|
628
624
|
• Example:
|
629
|
-
|
625
|
+
|
630
626
|
from rgwfuncs import send_data_to_slack
|
631
627
|
|
632
628
|
df = ... # Some DataFrame
|
@@ -637,7 +633,7 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
637
633
|
as_file=True,
|
638
634
|
remove_after_send=True
|
639
635
|
)
|
640
|
-
|
636
|
+
|
641
637
|
|
642
638
|
--------------------------------------------------------------------------------
|
643
639
|
|
@@ -652,14 +648,14 @@ Reorder the columns of a DataFrame based on a string input.
|
|
652
648
|
- pd.DataFrame
|
653
649
|
|
654
650
|
• Example:
|
655
|
-
|
651
|
+
|
656
652
|
from rgwfuncs import order_columns
|
657
653
|
import pandas as pd
|
658
654
|
|
659
655
|
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25], 'Salary': [1000, 1200]})
|
660
656
|
df_reordered = order_columns(df, 'Salary,Name,Age')
|
661
657
|
print(df_reordered)
|
662
|
-
|
658
|
+
|
663
659
|
|
664
660
|
--------------------------------------------------------------------------------
|
665
661
|
|
@@ -676,14 +672,14 @@ Append a ranged classification column to the DataFrame.
|
|
676
672
|
- pd.DataFrame
|
677
673
|
|
678
674
|
• Example:
|
679
|
-
|
675
|
+
|
680
676
|
from rgwfuncs import append_ranged_classification_column
|
681
677
|
import pandas as pd
|
682
678
|
|
683
679
|
df = pd.DataFrame({'Scores': [5, 12, 25]})
|
684
680
|
df_classified = append_ranged_classification_column(df, '0-10,11-20,21-30', 'Scores', 'ScoreRange')
|
685
681
|
print(df_classified)
|
686
|
-
|
682
|
+
|
687
683
|
|
688
684
|
--------------------------------------------------------------------------------
|
689
685
|
|
@@ -700,14 +696,14 @@ Append a percentile classification column to the DataFrame.
|
|
700
696
|
- pd.DataFrame
|
701
697
|
|
702
698
|
• Example:
|
703
|
-
|
699
|
+
|
704
700
|
from rgwfuncs import append_percentile_classification_column
|
705
701
|
import pandas as pd
|
706
702
|
|
707
703
|
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
|
708
704
|
df_classified = append_percentile_classification_column(df, '25,50,75', 'Values', 'ValuePercentile')
|
709
705
|
print(df_classified)
|
710
|
-
|
706
|
+
|
711
707
|
|
712
708
|
--------------------------------------------------------------------------------
|
713
709
|
|
@@ -724,7 +720,7 @@ Append a ranged date classification column to the DataFrame.
|
|
724
720
|
- pd.DataFrame
|
725
721
|
|
726
722
|
• Example:
|
727
|
-
|
723
|
+
|
728
724
|
from rgwfuncs import append_ranged_date_classification_column
|
729
725
|
import pandas as pd
|
730
726
|
|
@@ -736,7 +732,7 @@ Append a ranged date classification column to the DataFrame.
|
|
736
732
|
'DateRange'
|
737
733
|
)
|
738
734
|
print(df_classified)
|
739
|
-
|
735
|
+
|
740
736
|
|
741
737
|
--------------------------------------------------------------------------------
|
742
738
|
|
@@ -751,14 +747,14 @@ Rename columns in the DataFrame.
|
|
751
747
|
- pd.DataFrame
|
752
748
|
|
753
749
|
• Example:
|
754
|
-
|
750
|
+
|
755
751
|
from rgwfuncs import rename_columns
|
756
752
|
import pandas as pd
|
757
753
|
|
758
754
|
df = pd.DataFrame({'OldName': [1,2,3]})
|
759
755
|
df_renamed = rename_columns(df, {'OldName': 'NewName'})
|
760
756
|
print(df_renamed)
|
761
|
-
|
757
|
+
|
762
758
|
|
763
759
|
--------------------------------------------------------------------------------
|
764
760
|
|
@@ -773,7 +769,7 @@ Cascade sort the DataFrame by specified columns and order.
|
|
773
769
|
- pd.DataFrame
|
774
770
|
|
775
771
|
• Example:
|
776
|
-
|
772
|
+
|
777
773
|
from rgwfuncs import cascade_sort
|
778
774
|
import pandas as pd
|
779
775
|
|
@@ -784,7 +780,7 @@ Cascade sort the DataFrame by specified columns and order.
|
|
784
780
|
|
785
781
|
sorted_df = cascade_sort(df, ["Name::ASC", "Age::DESC"])
|
786
782
|
print(sorted_df)
|
787
|
-
|
783
|
+
|
788
784
|
|
789
785
|
--------------------------------------------------------------------------------
|
790
786
|
|
@@ -799,14 +795,14 @@ Append XGB training labels (TRAIN, VALIDATE, TEST) based on a ratio string.
|
|
799
795
|
- pd.DataFrame
|
800
796
|
|
801
797
|
• Example:
|
802
|
-
|
798
|
+
|
803
799
|
from rgwfuncs import append_xgb_labels
|
804
800
|
import pandas as pd
|
805
801
|
|
806
802
|
df = pd.DataFrame({'A': range(10)})
|
807
803
|
df_labeled = append_xgb_labels(df, "7:2:1")
|
808
804
|
print(df_labeled)
|
809
|
-
|
805
|
+
|
810
806
|
|
811
807
|
--------------------------------------------------------------------------------
|
812
808
|
|
@@ -825,7 +821,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
|
|
825
821
|
- pd.DataFrame
|
826
822
|
|
827
823
|
• Example:
|
828
|
-
|
824
|
+
|
829
825
|
from rgwfuncs import append_xgb_regression_predictions
|
830
826
|
import pandas as pd
|
831
827
|
|
@@ -838,7 +834,7 @@ Append XGB regression predictions to the DataFrame. Requires an `XGB_TYPE` colum
|
|
838
834
|
|
839
835
|
df_pred = append_xgb_regression_predictions(df, 'Target', 'Feature1,Feature2', 'PredictedTarget')
|
840
836
|
print(df_pred)
|
841
|
-
|
837
|
+
|
842
838
|
|
843
839
|
--------------------------------------------------------------------------------
|
844
840
|
|
@@ -857,7 +853,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
|
|
857
853
|
- pd.DataFrame
|
858
854
|
|
859
855
|
• Example:
|
860
|
-
|
856
|
+
|
861
857
|
from rgwfuncs import append_xgb_logistic_regression_predictions
|
862
858
|
import pandas as pd
|
863
859
|
|
@@ -870,7 +866,7 @@ Append XGB logistic regression predictions to the DataFrame. Requires an `XGB_TY
|
|
870
866
|
|
871
867
|
df_pred = append_xgb_logistic_regression_predictions(df, 'Target', 'Feature1,Feature2', 'PredictedTarget')
|
872
868
|
print(df_pred)
|
873
|
-
|
869
|
+
|
874
870
|
|
875
871
|
--------------------------------------------------------------------------------
|
876
872
|
|
@@ -884,13 +880,13 @@ Print the cascading frequency of top n values for specified columns.
|
|
884
880
|
- `order_by` (str): `ASC`, `DESC`, `FREQ_ASC`, `FREQ_DESC`.
|
885
881
|
|
886
882
|
• Example:
|
887
|
-
|
883
|
+
|
888
884
|
from rgwfuncs import print_n_frequency_cascading
|
889
885
|
import pandas as pd
|
890
886
|
|
891
887
|
df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
|
892
888
|
print_n_frequency_cascading(df, 2, 'City', 'FREQ_DESC')
|
893
|
-
|
889
|
+
|
894
890
|
|
895
891
|
--------------------------------------------------------------------------------
|
896
892
|
|
@@ -904,13 +900,13 @@ Print the linear frequency of top n values for specified columns.
|
|
904
900
|
- `order_by` (str)
|
905
901
|
|
906
902
|
• Example:
|
907
|
-
|
903
|
+
|
908
904
|
from rgwfuncs import print_n_frequency_linear
|
909
905
|
import pandas as pd
|
910
906
|
|
911
907
|
df = pd.DataFrame({'City': ['NY','LA','NY','SF','LA','LA']})
|
912
908
|
print_n_frequency_linear(df, 2, 'City', 'FREQ_DESC')
|
913
|
-
|
909
|
+
|
914
910
|
|
915
911
|
--------------------------------------------------------------------------------
|
916
912
|
|
@@ -925,14 +921,14 @@ Retain specified columns in the DataFrame and drop the others.
|
|
925
921
|
- pd.DataFrame
|
926
922
|
|
927
923
|
• Example:
|
928
|
-
|
924
|
+
|
929
925
|
from rgwfuncs import retain_columns
|
930
926
|
import pandas as pd
|
931
927
|
|
932
928
|
df = pd.DataFrame({'A': [1,2], 'B': [3,4], 'C': [5,6]})
|
933
929
|
df_reduced = retain_columns(df, ['A','C'])
|
934
930
|
print(df_reduced)
|
935
|
-
|
931
|
+
|
936
932
|
|
937
933
|
--------------------------------------------------------------------------------
|
938
934
|
|
@@ -948,7 +944,7 @@ Retain only rows with common column values between two DataFrames.
|
|
948
944
|
- pd.DataFrame
|
949
945
|
|
950
946
|
• Example:
|
951
|
-
|
947
|
+
|
952
948
|
from rgwfuncs import mask_against_dataframe
|
953
949
|
import pandas as pd
|
954
950
|
|
@@ -957,7 +953,7 @@ Retain only rows with common column values between two DataFrames.
|
|
957
953
|
|
958
954
|
df_masked = mask_against_dataframe(df1, df2, 'ID')
|
959
955
|
print(df_masked)
|
960
|
-
|
956
|
+
|
961
957
|
|
962
958
|
--------------------------------------------------------------------------------
|
963
959
|
|
@@ -973,7 +969,7 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
973
969
|
- pd.DataFrame
|
974
970
|
|
975
971
|
• Example:
|
976
|
-
|
972
|
+
|
977
973
|
from rgwfuncs import mask_against_dataframe_converse
|
978
974
|
import pandas as pd
|
979
975
|
|
@@ -982,20 +978,20 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
982
978
|
|
983
979
|
df_uncommon = mask_against_dataframe_converse(df1, df2, 'ID')
|
984
980
|
print(df_uncommon)
|
985
|
-
|
981
|
+
|
986
982
|
|
987
983
|
--------------------------------------------------------------------------------
|
988
984
|
|
989
985
|
## Additional Info
|
990
986
|
|
991
987
|
For more information, refer to each function’s docstring by calling:
|
992
|
-
|
993
|
-
rgwfuncs.docs(method_type_filter='function_name')
|
994
|
-
|
988
|
+
|
989
|
+
rgwfuncs.docs(method_type_filter='function_name')
|
990
|
+
|
995
991
|
or display all docstrings with:
|
996
|
-
|
997
|
-
rgwfuncs.docs(method_type_filter='*')
|
998
|
-
|
992
|
+
|
993
|
+
rgwfuncs.docs(method_type_filter='*')
|
994
|
+
|
999
995
|
|
1000
996
|
--------------------------------------------------------------------------------
|
1001
997
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=8suLAGE7rHBY9e2ViUJuRCUyiam4PO7bjNq_l59dW8Q,24
|
2
|
+
rgwfuncs/df_lib.py,sha256=vkPOg0acDUwEYbyELNZ4OTJ9cHu9MbZaC4quN8XWtQY,63202
|
3
|
+
rgwfuncs-0.0.4.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
+
rgwfuncs-0.0.4.dist-info/METADATA,sha256=FYC3hXPd-Zhyx-CcAX7MwCXaMKAehfMF9ac9Gy5RZ4w,26561
|
5
|
+
rgwfuncs-0.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
rgwfuncs-0.0.4.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
+
rgwfuncs-0.0.4.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
+
rgwfuncs-0.0.4.dist-info/RECORD,,
|
rgwfuncs-0.0.3.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=8suLAGE7rHBY9e2ViUJuRCUyiam4PO7bjNq_l59dW8Q,24
|
2
|
-
rgwfuncs/df_lib.py,sha256=vkPOg0acDUwEYbyELNZ4OTJ9cHu9MbZaC4quN8XWtQY,63202
|
3
|
-
rgwfuncs-0.0.3.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
-
rgwfuncs-0.0.3.dist-info/METADATA,sha256=Ay0Wq8YsjTBh2Sl1jrf8UZh9vACtBxpL4OWWqn6HUwg,26862
|
5
|
-
rgwfuncs-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
rgwfuncs-0.0.3.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
-
rgwfuncs-0.0.3.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
-
rgwfuncs-0.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|