imsciences 0.6.1.4__py3-none-any.whl → 0.6.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -194,9 +194,29 @@ class dataprocessing:
194
194
 
195
195
  print("\n34. Fill Weekly Missing Dates")
196
196
  print(" - Description: Fill in any missing weeks with 0.")
197
- print(" - Usage: fill_weekly_date_range(self, df, date_column, freq)")
197
+ print(" - Usage: fill_weekly_date_range(df, date_column, freq)")
198
198
  print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
199
-
199
+
200
+ print("\n35. Add Prefix and/or Suffix to Column Headers")
201
+ print(" - Description: Add Prefix and/or Suffix to Column Headers.")
202
+ print(" - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
203
+ print(" - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
204
+
205
+ print("\n36. Change all data to dummies")
206
+ print(" - Description: Changes time series to 0s and 1s based off threshold")
207
+ print(" - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
208
+ print(" - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
209
+
210
+ print("\n37. Replace substrings in column of strings")
211
+ print(" - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
212
+ print(" - Usage: replace_substrings(df, column, replacements, to_lower=False, new_column=None)")
213
+ print(" - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')")
214
+
215
+ print("\n38. Add totals column")
216
+ print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
217
+ print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
218
+ print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
219
+
200
220
  def get_wd_levels(self, levels):
201
221
  """
202
222
  Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
@@ -1291,10 +1311,120 @@ class dataprocessing:
1291
1311
  df_full.fillna(0, inplace=True)
1292
1312
 
1293
1313
  return df_full
1314
+
1315
+ def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
1316
+ """
1317
+ Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
1318
+
1319
+ Args:
1320
+ df (pd.DataFrame): The DataFrame whose column names will be modified.
1321
+ prefix (str, optional): The prefix to add to each column name. Default is an empty string.
1322
+ suffix (str, optional): The suffix to add to each column name. Default is an empty string.
1323
+ date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
1324
+
1325
+ Returns:
1326
+ pd.DataFrame: The DataFrame with updated column names.
1327
+ """
1328
+
1329
+ # If there is no date column
1330
+ if date_col is None:
1331
+ # Add prefixes and suffixes to all columns
1332
+ df.columns = [prefix + col + suffix for col in df.columns]
1333
+ else:
1334
+ # Add prefixes and suffixes to all columns except the date column
1335
+ df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
1336
+
1337
+ return df
1338
+
1339
+ def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
1340
+ """
1341
+ Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
1342
+ Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
1343
+
1344
+ Args:
1345
+ df (pd.DataFrame): The DataFrame to process.
1346
+ date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
1347
+ dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
1348
+ add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
1349
+ total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
1350
+
1351
+ Returns:
1352
+ pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
1353
+ """
1354
+
1355
+ # If there is no date column
1356
+ if date_col is None:
1357
+ df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
1358
+
1359
+ if add_total_dummy_col != 'No':
1360
+ # Find max value of rows
1361
+ df[total_col_name] = df.max(axis=1)
1362
+
1363
+ # If there is a date column
1364
+ else:
1365
+ # Create dummies
1366
+ df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
1367
+
1368
+ if add_total_dummy_col != 'No':
1369
+ # Find max value of rows
1370
+ df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
1294
1371
 
1372
+ return df
1373
+
1374
+ def replace_substrings(self, df, column, replacements, to_lower=False, new_column=None):
1375
+ """
1376
+ Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
1377
+ Optionally converts the column values to lowercase and allows creating a new column or modifying the existing one.
1378
+
1379
+ Args:
1380
+ df (pd.DataFrame): The DataFrame containing the column to modify.
1381
+ column (str): The column name where the replacements will be made.
1382
+ replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
1383
+ to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
1384
+ new_column (str, optional): If provided, the replacements will be applied to this new column. If None, the existing column will be modified. Default is None.
1385
+
1386
+ Returns:
1387
+ pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
1388
+ """
1389
+
1390
+ if new_column is not None:
1391
+ # Create a new column for replacements
1392
+ df[new_column] = df[column]
1393
+ temp_column = new_column
1394
+ else:
1395
+ # Modify the existing column
1396
+ temp_column = column
1397
+
1398
+ # Apply substring replacements
1399
+ for old, new in replacements.items():
1400
+ df[temp_column] = df[temp_column].str.replace(old, new, regex=False)
1401
+
1402
+ # Optionally convert to lowercase
1403
+ if to_lower:
1404
+ df[temp_column] = df[temp_column].str.lower()
1405
+
1406
+ return df
1295
1407
 
1408
+ def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
1409
+ """
1410
+ Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
1296
1411
 
1412
+ Args:
1413
+ df (pd.DataFrame): The DataFrame to modify.
1414
+ exclude_col (str, optional): The column name to exclude from the sum. Default is None.
1415
+ total_col_name (str, optional): The name of the new total column. Default is 'Total'.
1297
1416
 
1417
+ Returns:
1418
+ pd.DataFrame: The DataFrame with an added total column.
1419
+ """
1420
+ # If exclude_col is provided, drop that column before summing
1421
+ if exclude_col:
1422
+ df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
1423
+ else:
1424
+ # Sum across all columns if exclude_col is not provided
1425
+ df[total_col_name] = df.sum(axis=1)
1426
+
1427
+ return df
1298
1428
 
1299
1429
 
1300
1430
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.1.4
3
+ Version: 0.6.1.6
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
2
  dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
- imsciences/datafunctions.py,sha256=IsF_pSdisE1KSPnpHkS3JTxtiBGBiS1vXLisDRgKgRQ,129899
5
+ imsciences/datafunctions.py,sha256=1DOieL8Xfh6I-5JZlM_XKPwIon-I_VcDjppuvXmhYhk,137236
6
6
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
7
7
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
8
8
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
9
9
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
- imsciences-0.6.1.4.dist-info/METADATA,sha256=7qeNA6PXkl3QWfDizajpaIbOswxgoqu3BAcY0FP9GU8,854
12
- imsciences-0.6.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- imsciences-0.6.1.4.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
- imsciences-0.6.1.4.dist-info/RECORD,,
11
+ imsciences-0.6.1.6.dist-info/METADATA,sha256=SbdVxObVs6UW90bJ0eQIAbX1rd0urpG6sNmcxiB5uLw,854
12
+ imsciences-0.6.1.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ imsciences-0.6.1.6.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
+ imsciences-0.6.1.6.dist-info/RECORD,,