imsciences 0.6.1.4__py3-none-any.whl → 0.6.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +132 -2
- {imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/METADATA +1 -1
- {imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/RECORD +5 -5
- {imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/WHEEL +0 -0
- {imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -194,9 +194,29 @@ class dataprocessing:
|
|
|
194
194
|
|
|
195
195
|
print("\n34. Fill Weekly Missing Dates")
|
|
196
196
|
print(" - Description: Fill in any missing weeks with 0.")
|
|
197
|
-
print(" - Usage: fill_weekly_date_range(
|
|
197
|
+
print(" - Usage: fill_weekly_date_range(df, date_column, freq)")
|
|
198
198
|
print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
|
|
199
|
-
|
|
199
|
+
|
|
200
|
+
print("\n35. Add Prefix and/or Suffix to Column Headers")
|
|
201
|
+
print(" - Description: Add Prefix and/or Suffix to Column Headers.")
|
|
202
|
+
print(" - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
|
|
203
|
+
print(" - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
|
|
204
|
+
|
|
205
|
+
print("\n36. Change all data to dummies")
|
|
206
|
+
print(" - Description: Changes time series to 0s and 1s based off threshold")
|
|
207
|
+
print(" - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
|
|
208
|
+
print(" - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
|
|
209
|
+
|
|
210
|
+
print("\n37. Replace substrings in column of strings")
|
|
211
|
+
print(" - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
|
|
212
|
+
print(" - Usage: replace_substrings(df, column, replacements, to_lower=False, new_column=None)")
|
|
213
|
+
print(" - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')")
|
|
214
|
+
|
|
215
|
+
print("\n38. Add totals column")
|
|
216
|
+
print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
|
|
217
|
+
print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
|
|
218
|
+
print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
|
|
219
|
+
|
|
200
220
|
def get_wd_levels(self, levels):
|
|
201
221
|
"""
|
|
202
222
|
Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
|
|
@@ -1291,10 +1311,120 @@ class dataprocessing:
|
|
|
1291
1311
|
df_full.fillna(0, inplace=True)
|
|
1292
1312
|
|
|
1293
1313
|
return df_full
|
|
1314
|
+
|
|
1315
|
+
def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
|
|
1316
|
+
"""
|
|
1317
|
+
Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
|
|
1318
|
+
|
|
1319
|
+
Args:
|
|
1320
|
+
df (pd.DataFrame): The DataFrame whose column names will be modified.
|
|
1321
|
+
prefix (str, optional): The prefix to add to each column name. Default is an empty string.
|
|
1322
|
+
suffix (str, optional): The suffix to add to each column name. Default is an empty string.
|
|
1323
|
+
date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
|
|
1324
|
+
|
|
1325
|
+
Returns:
|
|
1326
|
+
pd.DataFrame: The DataFrame with updated column names.
|
|
1327
|
+
"""
|
|
1328
|
+
|
|
1329
|
+
# If there is no date column
|
|
1330
|
+
if date_col is None:
|
|
1331
|
+
# Add prefixes and suffixes to all columns
|
|
1332
|
+
df.columns = [prefix + col + suffix for col in df.columns]
|
|
1333
|
+
else:
|
|
1334
|
+
# Add prefixes and suffixes to all columns except the date column
|
|
1335
|
+
df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
|
|
1336
|
+
|
|
1337
|
+
return df
|
|
1338
|
+
|
|
1339
|
+
def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
|
|
1340
|
+
"""
|
|
1341
|
+
Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
|
|
1342
|
+
Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
|
|
1343
|
+
|
|
1344
|
+
Args:
|
|
1345
|
+
df (pd.DataFrame): The DataFrame to process.
|
|
1346
|
+
date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
|
|
1347
|
+
dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
|
|
1348
|
+
add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
|
|
1349
|
+
total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
|
|
1350
|
+
|
|
1351
|
+
Returns:
|
|
1352
|
+
pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
|
|
1353
|
+
"""
|
|
1354
|
+
|
|
1355
|
+
# If there is no date column
|
|
1356
|
+
if date_col is None:
|
|
1357
|
+
df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
|
|
1358
|
+
|
|
1359
|
+
if add_total_dummy_col != 'No':
|
|
1360
|
+
# Find max value of rows
|
|
1361
|
+
df[total_col_name] = df.max(axis=1)
|
|
1362
|
+
|
|
1363
|
+
# If there is a date column
|
|
1364
|
+
else:
|
|
1365
|
+
# Create dummies
|
|
1366
|
+
df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
|
|
1367
|
+
|
|
1368
|
+
if add_total_dummy_col != 'No':
|
|
1369
|
+
# Find max value of rows
|
|
1370
|
+
df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
|
|
1294
1371
|
|
|
1372
|
+
return df
|
|
1373
|
+
|
|
1374
|
+
def replace_substrings(self, df, column, replacements, to_lower=False, new_column=None):
|
|
1375
|
+
"""
|
|
1376
|
+
Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
|
|
1377
|
+
Optionally converts the column values to lowercase and allows creating a new column or modifying the existing one.
|
|
1378
|
+
|
|
1379
|
+
Args:
|
|
1380
|
+
df (pd.DataFrame): The DataFrame containing the column to modify.
|
|
1381
|
+
column (str): The column name where the replacements will be made.
|
|
1382
|
+
replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
|
|
1383
|
+
to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
|
|
1384
|
+
new_column (str, optional): If provided, the replacements will be applied to this new column. If None, the existing column will be modified. Default is None.
|
|
1385
|
+
|
|
1386
|
+
Returns:
|
|
1387
|
+
pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
|
|
1388
|
+
"""
|
|
1389
|
+
|
|
1390
|
+
if new_column is not None:
|
|
1391
|
+
# Create a new column for replacements
|
|
1392
|
+
df[new_column] = df[column]
|
|
1393
|
+
temp_column = new_column
|
|
1394
|
+
else:
|
|
1395
|
+
# Modify the existing column
|
|
1396
|
+
temp_column = column
|
|
1397
|
+
|
|
1398
|
+
# Apply substring replacements
|
|
1399
|
+
for old, new in replacements.items():
|
|
1400
|
+
df[temp_column] = df[temp_column].str.replace(old, new, regex=False)
|
|
1401
|
+
|
|
1402
|
+
# Optionally convert to lowercase
|
|
1403
|
+
if to_lower:
|
|
1404
|
+
df[temp_column] = df[temp_column].str.lower()
|
|
1405
|
+
|
|
1406
|
+
return df
|
|
1295
1407
|
|
|
1408
|
+
def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
|
|
1409
|
+
"""
|
|
1410
|
+
Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
|
|
1296
1411
|
|
|
1412
|
+
Args:
|
|
1413
|
+
df (pd.DataFrame): The DataFrame to modify.
|
|
1414
|
+
exclude_col (str, optional): The column name to exclude from the sum. Default is None.
|
|
1415
|
+
total_col_name (str, optional): The name of the new total column. Default is 'Total'.
|
|
1297
1416
|
|
|
1417
|
+
Returns:
|
|
1418
|
+
pd.DataFrame: The DataFrame with an added total column.
|
|
1419
|
+
"""
|
|
1420
|
+
# If exclude_col is provided, drop that column before summing
|
|
1421
|
+
if exclude_col:
|
|
1422
|
+
df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
|
|
1423
|
+
else:
|
|
1424
|
+
# Sum across all columns if exclude_col is not provided
|
|
1425
|
+
df[total_col_name] = df.sum(axis=1)
|
|
1426
|
+
|
|
1427
|
+
return df
|
|
1298
1428
|
|
|
1299
1429
|
|
|
1300
1430
|
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=1DOieL8Xfh6I-5JZlM_XKPwIon-I_VcDjppuvXmhYhk,137236
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.1.
|
|
12
|
-
imsciences-0.6.1.
|
|
13
|
-
imsciences-0.6.1.
|
|
14
|
-
imsciences-0.6.1.
|
|
11
|
+
imsciences-0.6.1.6.dist-info/METADATA,sha256=SbdVxObVs6UW90bJ0eQIAbX1rd0urpG6sNmcxiB5uLw,854
|
|
12
|
+
imsciences-0.6.1.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.1.6.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|