imsciences 0.6.1.3__tar.gz → 0.6.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.1.3
3
+ Version: 0.6.1.5
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
- Requires-Dist: pandas
16
15
 
17
16
  # IMS Package Documentation
18
17
 
@@ -194,9 +194,29 @@ class dataprocessing:
194
194
 
195
195
  print("\n34. Fill Weekly Missing Dates")
196
196
  print(" - Description: Fill in any missing weeks with 0.")
197
- print(" - Usage: fill_weekly_date_range(self, df, date_column, freq)")
197
+ print(" - Usage: fill_weekly_date_range(df, date_column, freq)")
198
198
  print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
199
-
199
+
200
+ print("\n35. Add Prefix and/or Suffix to Column Headers")
201
+ print(" - Description: Add Prefix and/or Suffix to Column Headers.")
202
+ print(" - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
203
+ print(" - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
204
+
205
+ print("\n36. Change all data to dummies")
206
+ print(" - Description: Changes time series to 0s and 1s based off threshold")
207
+ print(" - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
208
+ print(" - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
209
+
210
+ print("\n37. Replace substrings in column of strings")
211
+ print(" - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
212
+ print(" - Usage: replace_substrings(df, column, replacements, to_lower=False)")
213
+ print(" - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True)")
214
+
215
+ print("\n38. Add totals column")
216
+ print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
217
+ print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
218
+ print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
219
+
200
220
  def get_wd_levels(self, levels):
201
221
  """
202
222
  Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
@@ -1291,10 +1311,110 @@ class dataprocessing:
1291
1311
  df_full.fillna(0, inplace=True)
1292
1312
 
1293
1313
  return df_full
1314
+
1315
+ def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
1316
+ """
1317
+ Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
1294
1318
 
1319
+ Args:
1320
+ df (pd.DataFrame): The DataFrame whose column names will be modified.
1321
+ prefix (str, optional): The prefix to add to each column name. Default is an empty string.
1322
+ suffix (str, optional): The suffix to add to each column name. Default is an empty string.
1323
+ date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
1295
1324
 
1325
+ Returns:
1326
+ pd.DataFrame: The DataFrame with updated column names.
1327
+ """
1328
+
1329
+ # If there is no date column
1330
+ if date_col is None:
1331
+ # Add prefixes and suffixes to all columns
1332
+ df.columns = [prefix + col + suffix for col in df.columns]
1333
+ else:
1334
+ # Add prefixes and suffixes to all columns except the date column
1335
+ df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
1336
+
1337
+ return df
1296
1338
 
1339
+ def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
1340
+ """
1341
+ Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
1342
+ Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
1343
+
1344
+ Args:
1345
+ df (pd.DataFrame): The DataFrame to process.
1346
+ date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
1347
+ dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
1348
+ add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
1349
+ total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
1350
+
1351
+ Returns:
1352
+ pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
1353
+ """
1354
+
1355
+ # If there is no date column
1356
+ if date_col is None:
1357
+ df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
1358
+
1359
+ if add_total_dummy_col != 'No':
1360
+ # Find max value of rows
1361
+ df[total_col_name] = df.max(axis=1)
1362
+
1363
+ # If there is a date column
1364
+ else:
1365
+ # Create dummies
1366
+ df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
1297
1367
 
1368
+ if add_total_dummy_col != 'No':
1369
+ # Find max value of rows
1370
+ df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
1371
+
1372
+ return df
1373
+
1374
+ def replace_substrings(self, df, column, replacements, to_lower=False):
1375
+ """
1376
+ Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
1377
+ Optionally converts the column values to lowercase.
1378
+
1379
+ Args:
1380
+ df (pd.DataFrame): The DataFrame containing the column to modify.
1381
+ column (str): The column name where the replacements will be made.
1382
+ replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
1383
+ to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
1384
+
1385
+ Returns:
1386
+ pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
1387
+ """
1388
+
1389
+ # Apply substring replacements
1390
+ for old, new in replacements.items():
1391
+ df[column] = df[column].str.replace(old, new, regex=False)
1392
+
1393
+ # Optionally convert to lowercase
1394
+ if to_lower:
1395
+ df[column] = df[column].str.lower()
1396
+ return df
1397
+
1398
+ def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
1399
+ """
1400
+ Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
1401
+
1402
+ Args:
1403
+ df (pd.DataFrame): The DataFrame to modify.
1404
+ exclude_col (str, optional): The column name to exclude from the sum. Default is None.
1405
+ total_col_name (str, optional): The name of the new total column. Default is 'Total'.
1406
+
1407
+ Returns:
1408
+ pd.DataFrame: The DataFrame with an added total column.
1409
+ """
1410
+ # If exclude_col is provided, drop that column before summing
1411
+ if exclude_col:
1412
+ df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
1413
+ else:
1414
+ # Sum across all columns if exclude_col is not provided
1415
+ df[total_col_name] = df.sum(axis=1)
1416
+
1417
+ return df
1298
1418
 
1299
1419
 
1300
1420
 
@@ -2495,7 +2615,7 @@ class datapull:
2495
2615
 
2496
2616
  return final_weather
2497
2617
 
2498
- def pull_covid_data(folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
2618
+ def pull_covid_data(self, folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
2499
2619
  """
2500
2620
  Get covid pandemic data for the country of interest.
2501
2621
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.1.3
3
+ Version: 0.6.1.5
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
- Requires-Dist: pandas
16
15
 
17
16
  # IMS Package Documentation
18
17
 
@@ -8,7 +8,7 @@ def read_md(file_name):
8
8
  return f.read()
9
9
  return ''
10
10
 
11
- VERSION = '0.6.1.3'
11
+ VERSION = '0.6.1.5'
12
12
  DESCRIPTION = 'IMS Data Processing Package'
13
13
  LONG_DESCRIPTION = read_md('README.md') # Reading from README.md
14
14
 
File without changes
File without changes