imsciences 0.6.1.3__tar.gz → 0.6.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/PKG-INFO +1 -2
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences/datafunctions.py +123 -3
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences.egg-info/PKG-INFO +1 -2
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/setup.py +1 -1
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/README.md +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences/__init__.py +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.6.1.3 → imsciences-0.6.1.5}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.6.1.
|
|
3
|
+
Version: 0.6.1.5
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
|
|
|
12
12
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
13
|
Classifier: Operating System :: Microsoft :: Windows
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
|
-
Requires-Dist: pandas
|
|
16
15
|
|
|
17
16
|
# IMS Package Documentation
|
|
18
17
|
|
|
@@ -194,9 +194,29 @@ class dataprocessing:
|
|
|
194
194
|
|
|
195
195
|
print("\n34. Fill Weekly Missing Dates")
|
|
196
196
|
print(" - Description: Fill in any missing weeks with 0.")
|
|
197
|
-
print(" - Usage: fill_weekly_date_range(
|
|
197
|
+
print(" - Usage: fill_weekly_date_range(df, date_column, freq)")
|
|
198
198
|
print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
|
|
199
|
-
|
|
199
|
+
|
|
200
|
+
print("\n35. Add Prefix and/or Suffix to Column Headers")
|
|
201
|
+
print(" - Description: Add Prefix and/or Suffix to Column Headers.")
|
|
202
|
+
print(" - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
|
|
203
|
+
print(" - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
|
|
204
|
+
|
|
205
|
+
print("\n36. Change all data to dummies")
|
|
206
|
+
print(" - Description: Changes time series to 0s and 1s based off threshold")
|
|
207
|
+
print(" - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
|
|
208
|
+
print(" - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
|
|
209
|
+
|
|
210
|
+
print("\n37. Replace substrings in column of strings")
|
|
211
|
+
print(" - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
|
|
212
|
+
print(" - Usage: replace_substrings(df, column, replacements, to_lower=False)")
|
|
213
|
+
print(" - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True)")
|
|
214
|
+
|
|
215
|
+
print("\n38. Add totals column")
|
|
216
|
+
print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
|
|
217
|
+
print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
|
|
218
|
+
print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
|
|
219
|
+
|
|
200
220
|
def get_wd_levels(self, levels):
|
|
201
221
|
"""
|
|
202
222
|
Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
|
|
@@ -1291,10 +1311,110 @@ class dataprocessing:
|
|
|
1291
1311
|
df_full.fillna(0, inplace=True)
|
|
1292
1312
|
|
|
1293
1313
|
return df_full
|
|
1314
|
+
|
|
1315
|
+
def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
|
|
1316
|
+
"""
|
|
1317
|
+
Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
|
|
1294
1318
|
|
|
1319
|
+
Args:
|
|
1320
|
+
df (pd.DataFrame): The DataFrame whose column names will be modified.
|
|
1321
|
+
prefix (str, optional): The prefix to add to each column name. Default is an empty string.
|
|
1322
|
+
suffix (str, optional): The suffix to add to each column name. Default is an empty string.
|
|
1323
|
+
date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
|
|
1295
1324
|
|
|
1325
|
+
Returns:
|
|
1326
|
+
pd.DataFrame: The DataFrame with updated column names.
|
|
1327
|
+
"""
|
|
1328
|
+
|
|
1329
|
+
# If there is no date column
|
|
1330
|
+
if date_col is None:
|
|
1331
|
+
# Add prefixes and suffixes to all columns
|
|
1332
|
+
df.columns = [prefix + col + suffix for col in df.columns]
|
|
1333
|
+
else:
|
|
1334
|
+
# Add prefixes and suffixes to all columns except the date column
|
|
1335
|
+
df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
|
|
1336
|
+
|
|
1337
|
+
return df
|
|
1296
1338
|
|
|
1339
|
+
def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
|
|
1340
|
+
"""
|
|
1341
|
+
Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
|
|
1342
|
+
Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
|
|
1343
|
+
|
|
1344
|
+
Args:
|
|
1345
|
+
df (pd.DataFrame): The DataFrame to process.
|
|
1346
|
+
date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
|
|
1347
|
+
dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
|
|
1348
|
+
add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
|
|
1349
|
+
total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
|
|
1350
|
+
|
|
1351
|
+
Returns:
|
|
1352
|
+
pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
|
|
1353
|
+
"""
|
|
1354
|
+
|
|
1355
|
+
# If there is no date column
|
|
1356
|
+
if date_col is None:
|
|
1357
|
+
df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
|
|
1358
|
+
|
|
1359
|
+
if add_total_dummy_col != 'No':
|
|
1360
|
+
# Find max value of rows
|
|
1361
|
+
df[total_col_name] = df.max(axis=1)
|
|
1362
|
+
|
|
1363
|
+
# If there is a date column
|
|
1364
|
+
else:
|
|
1365
|
+
# Create dummies
|
|
1366
|
+
df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
|
|
1297
1367
|
|
|
1368
|
+
if add_total_dummy_col != 'No':
|
|
1369
|
+
# Find max value of rows
|
|
1370
|
+
df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
|
|
1371
|
+
|
|
1372
|
+
return df
|
|
1373
|
+
|
|
1374
|
+
def replace_substrings(self, df, column, replacements, to_lower=False):
|
|
1375
|
+
"""
|
|
1376
|
+
Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
|
|
1377
|
+
Optionally converts the column values to lowercase.
|
|
1378
|
+
|
|
1379
|
+
Args:
|
|
1380
|
+
df (pd.DataFrame): The DataFrame containing the column to modify.
|
|
1381
|
+
column (str): The column name where the replacements will be made.
|
|
1382
|
+
replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
|
|
1383
|
+
to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
|
|
1384
|
+
|
|
1385
|
+
Returns:
|
|
1386
|
+
pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
|
|
1387
|
+
"""
|
|
1388
|
+
|
|
1389
|
+
# Apply substring replacements
|
|
1390
|
+
for old, new in replacements.items():
|
|
1391
|
+
df[column] = df[column].str.replace(old, new, regex=False)
|
|
1392
|
+
|
|
1393
|
+
# Optionally convert to lowercase
|
|
1394
|
+
if to_lower:
|
|
1395
|
+
df[column] = df[column].str.lower()
|
|
1396
|
+
return df
|
|
1397
|
+
|
|
1398
|
+
def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
|
|
1399
|
+
"""
|
|
1400
|
+
Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
|
|
1401
|
+
|
|
1402
|
+
Args:
|
|
1403
|
+
df (pd.DataFrame): The DataFrame to modify.
|
|
1404
|
+
exclude_col (str, optional): The column name to exclude from the sum. Default is None.
|
|
1405
|
+
total_col_name (str, optional): The name of the new total column. Default is 'Total'.
|
|
1406
|
+
|
|
1407
|
+
Returns:
|
|
1408
|
+
pd.DataFrame: The DataFrame with an added total column.
|
|
1409
|
+
"""
|
|
1410
|
+
# If exclude_col is provided, drop that column before summing
|
|
1411
|
+
if exclude_col:
|
|
1412
|
+
df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
|
|
1413
|
+
else:
|
|
1414
|
+
# Sum across all columns if exclude_col is not provided
|
|
1415
|
+
df[total_col_name] = df.sum(axis=1)
|
|
1416
|
+
|
|
1417
|
+
return df
|
|
1298
1418
|
|
|
1299
1419
|
|
|
1300
1420
|
|
|
@@ -2495,7 +2615,7 @@ class datapull:
|
|
|
2495
2615
|
|
|
2496
2616
|
return final_weather
|
|
2497
2617
|
|
|
2498
|
-
def pull_covid_data(folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
|
|
2618
|
+
def pull_covid_data(self, folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
|
|
2499
2619
|
"""
|
|
2500
2620
|
Get covid pandemic data for the country of interest.
|
|
2501
2621
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.6.1.
|
|
3
|
+
Version: 0.6.1.5
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
|
|
|
12
12
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
13
|
Classifier: Operating System :: Microsoft :: Windows
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
|
-
Requires-Dist: pandas
|
|
16
15
|
|
|
17
16
|
# IMS Package Documentation
|
|
18
17
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|