imsciences 0.6.0.3__tar.gz → 0.6.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences-0.6.0.5/PKG-INFO +23 -0
- imsciences-0.6.0.5/README.md +8 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences/datafunctions.py +32 -4
- imsciences-0.6.0.5/imsciences.egg-info/PKG-INFO +23 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/setup.py +1 -1
- imsciences-0.6.0.3/PKG-INFO +0 -175
- imsciences-0.6.0.3/README.md +0 -160
- imsciences-0.6.0.3/imsciences.egg-info/PKG-INFO +0 -175
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences/__init__.py +0 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.6.0.3 → imsciences-0.6.0.5}/setup.cfg +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: imsciences
|
|
3
|
+
Version: 0.6.0.5
|
|
4
|
+
Summary: IMS Data Processing Package
|
|
5
|
+
Author: IMS
|
|
6
|
+
Author-email: cam@im-sciences.com
|
|
7
|
+
Keywords: python,data processing
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# IMS Package Documentation
|
|
17
|
+
|
|
18
|
+
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
19
|
+
|
|
20
|
+
## Data Processing
|
|
21
|
+
|
|
22
|
+
## Data Pulling
|
|
23
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# IMS Package Documentation
|
|
2
|
+
|
|
3
|
+
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
4
|
+
|
|
5
|
+
## Data Processing
|
|
6
|
+
|
|
7
|
+
## Data Pulling
|
|
8
|
+
|
|
@@ -189,7 +189,12 @@ class dataprocessing:
|
|
|
189
189
|
print("\n33. Convert Mixed Formats Dates")
|
|
190
190
|
print(" - Description: Convert a mix of US and UK dates to datetime.")
|
|
191
191
|
print(" - Usage: convert_mixed_formats_dates(df, datecol)")
|
|
192
|
-
print(" - Example: convert_mixed_formats_dates(df, 'OBS')")
|
|
192
|
+
print(" - Example: convert_mixed_formats_dates(df, 'OBS')")
|
|
193
|
+
|
|
194
|
+
print("\n34. Fill Weekly Missing Dates")
|
|
195
|
+
print(" - Description: Fill in any missing weeks with 0.")
|
|
196
|
+
print(" - Usage: fill_weekly_date_range(self, df, date_column, freq)")
|
|
197
|
+
print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
|
|
193
198
|
|
|
194
199
|
def get_wd_levels(self, levels):
|
|
195
200
|
"""
|
|
@@ -893,7 +898,7 @@ class dataprocessing:
|
|
|
893
898
|
start_year = int(starting_FY[2:])
|
|
894
899
|
|
|
895
900
|
def calculate_FY_vectorized(date_series):
|
|
896
|
-
years_since_start = ((date_series - start_date).dt.days /
|
|
901
|
+
years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
|
|
897
902
|
fy = 'FY' + (start_year + years_since_start).astype(str)
|
|
898
903
|
if short_format == "Yes":
|
|
899
904
|
fy = 'FY' + fy.str[-2:]
|
|
@@ -902,8 +907,14 @@ class dataprocessing:
|
|
|
902
907
|
df['FY'] = calculate_FY_vectorized(df[index_col])
|
|
903
908
|
|
|
904
909
|
if half_years == "Yes" or combined_FY_and_H == "Yes":
|
|
905
|
-
|
|
906
|
-
|
|
910
|
+
def calculate_half_year_vectorized(date_series):
|
|
911
|
+
fy_years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
|
|
912
|
+
fy_start_dates = start_date + fy_years_since_start * pd.DateOffset(years=1)
|
|
913
|
+
fy_end_of_h1 = fy_start_dates + pd.DateOffset(weeks=26) - pd.DateOffset(weeks=1)
|
|
914
|
+
half_year = np.where(date_series <= fy_end_of_h1, 'H1', 'H2')
|
|
915
|
+
return half_year
|
|
916
|
+
|
|
917
|
+
df['Half Years'] = calculate_half_year_vectorized(df[index_col])
|
|
907
918
|
|
|
908
919
|
if combined_FY_and_H == "Yes":
|
|
909
920
|
df['Financial Half Years'] = df['FY'] + ' ' + df['Half Years']
|
|
@@ -1262,6 +1273,23 @@ class dataprocessing:
|
|
|
1262
1273
|
df[column_name] = corrected_dates
|
|
1263
1274
|
return df
|
|
1264
1275
|
|
|
1276
|
+
def fill_weekly_date_range(self, df, date_column, freq='W-MON'):
|
|
1277
|
+
# Ensure the date column is in datetime format
|
|
1278
|
+
df[date_column] = pd.to_datetime(df[date_column])
|
|
1279
|
+
|
|
1280
|
+
# Generate the full date range with the specified frequency
|
|
1281
|
+
full_date_range = pd.date_range(start=df[date_column].min(), end=df[date_column].max(), freq=freq)
|
|
1282
|
+
|
|
1283
|
+
# Create a new dataframe with the full date range
|
|
1284
|
+
full_date_df = pd.DataFrame({date_column: full_date_range})
|
|
1285
|
+
|
|
1286
|
+
# Merge the original dataframe with the new full date range dataframe
|
|
1287
|
+
df_full = full_date_df.merge(df, on=date_column, how='left')
|
|
1288
|
+
|
|
1289
|
+
# Fill missing values with 0
|
|
1290
|
+
df_full.fillna(0, inplace=True)
|
|
1291
|
+
|
|
1292
|
+
return df_full
|
|
1265
1293
|
|
|
1266
1294
|
|
|
1267
1295
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: imsciences
|
|
3
|
+
Version: 0.6.0.5
|
|
4
|
+
Summary: IMS Data Processing Package
|
|
5
|
+
Author: IMS
|
|
6
|
+
Author-email: cam@im-sciences.com
|
|
7
|
+
Keywords: python,data processing
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# IMS Package Documentation
|
|
17
|
+
|
|
18
|
+
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
19
|
+
|
|
20
|
+
## Data Processing
|
|
21
|
+
|
|
22
|
+
## Data Pulling
|
|
23
|
+
|
imsciences-0.6.0.3/PKG-INFO
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: imsciences
|
|
3
|
-
Version: 0.6.0.3
|
|
4
|
-
Summary: IMS Data Processing Package
|
|
5
|
-
Author: IMS
|
|
6
|
-
Author-email: cam@im-sciences.com
|
|
7
|
-
Keywords: python,data processing
|
|
8
|
-
Classifier: Development Status :: 3 - Alpha
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Operating System :: Unix
|
|
12
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
|
|
16
|
-
# IMS Package Documentation
|
|
17
|
-
|
|
18
|
-
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
19
|
-
|
|
20
|
-
## Data Processing
|
|
21
|
-
|
|
22
|
-
### 1. `get_wd_levels(levels)`
|
|
23
|
-
- **Description**: Get the working directory with the option of moving up parents.
|
|
24
|
-
- **Usage**: `get_wd_levels(levels)`
|
|
25
|
-
|
|
26
|
-
### 2. `remove_rows(data_frame, num_rows_to_remove)`
|
|
27
|
-
- **Description**: Removes a specified number of rows from a pandas DataFrame.
|
|
28
|
-
- **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
|
|
29
|
-
|
|
30
|
-
### 3. `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
31
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the long format.
|
|
32
|
-
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
33
|
-
|
|
34
|
-
### 4. `convert_monthly_to_daily(df, date_column)`
|
|
35
|
-
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
36
|
-
- **Usage**: `convert_monthly_to_daily(df, date_column)`
|
|
37
|
-
|
|
38
|
-
### 5. `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
39
|
-
- **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
|
|
40
|
-
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
41
|
-
|
|
42
|
-
### 6. `remove_nan_rows(df, col_to_remove_rows)`
|
|
43
|
-
- **Description**: Removes rows from a DataFrame where the specified column has NaN values.
|
|
44
|
-
- **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
|
|
45
|
-
|
|
46
|
-
### 7. `filter_rows(df, col_to_filter, list_of_filters)`
|
|
47
|
-
- **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
|
|
48
|
-
- **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
|
|
49
|
-
|
|
50
|
-
### 8. `plot_one(df1, col1, date_column)`
|
|
51
|
-
- **Description**: Plots a specified column from a DataFrame.
|
|
52
|
-
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
53
|
-
|
|
54
|
-
### 9. `week_of_year_mapping(df, week_col, start_day_str)`
|
|
55
|
-
- **Description**: Converts a week column in 'yyyy-Www' or 'yyyy-ww' format to week commencing date.
|
|
56
|
-
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
57
|
-
|
|
58
|
-
### 10. `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
59
|
-
- **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
|
|
60
|
-
- **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
61
|
-
|
|
62
|
-
### 11. `rename_cols(df, cols_to_rename)`
|
|
63
|
-
- **Description**: Renames columns in a pandas DataFrame.
|
|
64
|
-
- **Usage**: `rename_cols(df, cols_to_rename)`
|
|
65
|
-
|
|
66
|
-
### 12. `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
67
|
-
- **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
|
|
68
|
-
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
69
|
-
|
|
70
|
-
### 13. `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
71
|
-
- **Description**: Merge a list of DataFrames on a common column.
|
|
72
|
-
- **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
73
|
-
|
|
74
|
-
### 14. `merge_and_update_dfs(df1, df2, key_column)`
|
|
75
|
-
- **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
|
|
76
|
-
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
77
|
-
|
|
78
|
-
### 15. `convert_us_to_uk_dates(df, date_col)`
|
|
79
|
-
- **Description**: Convert a DataFrame column with mixed date formats to datetime.
|
|
80
|
-
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
81
|
-
|
|
82
|
-
### 16. `combine_sheets(all_sheets)`
|
|
83
|
-
- **Description**: Combines multiple DataFrames from a dictionary into a single DataFrame.
|
|
84
|
-
- **Usage**: `combine_sheets({'Sheet1': df1, 'Sheet2': df2})`
|
|
85
|
-
|
|
86
|
-
### 17. `pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
87
|
-
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
88
|
-
- **Usage**: `pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
89
|
-
|
|
90
|
-
### 18. `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping')`
|
|
91
|
-
- **Description**: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.
|
|
92
|
-
- **Usage**: `classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')`
|
|
93
|
-
|
|
94
|
-
### 19. `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
95
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the wide format.
|
|
96
|
-
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
97
|
-
|
|
98
|
-
### 20. `merge_cols_with_seperator(self, df, col_names,seperator='_',output_column_name = 'Merged',starting_prefix_str=None,ending_prefix_str=None)`
|
|
99
|
-
- **Description**: Merge multiple columns in a dataframe into 1 column with a seperator.Can be used if multiple columns are needed for a LUT.
|
|
100
|
-
- **Usage**: `merge_cols_with_seperator(df, ['Campaign','Product'],seperator='|','Merged Columns',starting_prefix_str='start_',ending_prefix_str='_end')`
|
|
101
|
-
|
|
102
|
-
### 21. `check_sum_of_df_cols_are_equal(df_1,df_2,cols_1,cols_2)`
|
|
103
|
-
- **Description**: Checks if the sum of two columns in two dataframes are the same, and provides the sums of each column and the difference between them.
|
|
104
|
-
- **Usage**: `check_sum_of_df_cols_are_equal(df_1,df_2,'Media Cost','Spend')`
|
|
105
|
-
|
|
106
|
-
### 22. `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
107
|
-
- **Description**: Can be used to create an LUT. Creates a dictionary using two columns in a dataframe.
|
|
108
|
-
- **Usage**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
109
|
-
|
|
110
|
-
### 23. `create_FY_and_H_columns(df, index_col, start_date, starting_FY,short_format='No',half_years='No',combined_FY_and_H='No')`
|
|
111
|
-
- **Description**: Used to create a financial year, half year, and financial half year column.
|
|
112
|
-
- **Usage**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023',short_format='Yes',half_years='Yes',combined_FY_and_H='Yes')`
|
|
113
|
-
|
|
114
|
-
### 24. `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict,output_column_name='Updated Column')`
|
|
115
|
-
- **Description**: Essentially provides an if statement with a xlookup if a value is something. Updates certain chosen values in a specified column of the DataFrame based on a lookup dictionary.
|
|
116
|
-
- **Usage**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel','segment','product'], qlik_dict_for_channel,output_column_name='Channel New')`
|
|
117
|
-
|
|
118
|
-
### 25. `create_new_version_of_col_using_LUT(df, keys_col,value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
119
|
-
- **Description**: Creates a new column in a dataframe, which takes an old column and uses a lookup table to changes values in the new column to reflect the lookup table. The lookup is based on a column in the dataframe.
|
|
120
|
-
- **Usage**: `keyword_lookup_replacement(df, '*Campaign Name','Campaign Type',search_campaign_name_retag_lut,'Campaign Name New')`
|
|
121
|
-
|
|
122
|
-
### 26. `convert_df_wide_2_long(df,value_cols,variable_col_name='Stacked',value_col_name='Value')`
|
|
123
|
-
- **Description**: Changes a dataframe from wide to long format.
|
|
124
|
-
- **Usage**: `keyword_lookup_replacement(df, ['Media Cost','Impressions','Clicks'],variable_col_name='Metric')`
|
|
125
|
-
|
|
126
|
-
### 27. `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
127
|
-
- **Description**: Allows the capability to manually update any cell in dataframe by applying filters and chosing a column to edit in dataframe.
|
|
128
|
-
- **Usage**: `keyword_lookup_replacement(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''},'Master Include',1,change_in_existing_df_col = 'Yes',new_col_to_change_name = 'Master Include',manual_edit_col_name = 'Manual Changes')`
|
|
129
|
-
|
|
130
|
-
### 28. `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
131
|
-
- **Description**: Converts data in numerical format into numbers with commas and a chosen decimal place length.
|
|
132
|
-
- **Usage**: `format_numbers_with_commas(df,1)`
|
|
133
|
-
|
|
134
|
-
### 29. `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
135
|
-
- **Description**: Filters dataframe on multiple conditions, which come in the form of a dictionary.
|
|
136
|
-
- **Usage**: `filter_df_on_multiple_conditions(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''})`
|
|
137
|
-
|
|
138
|
-
### 30. `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
139
|
-
- **Description**: Read and Concatinate all files of one type in a folder.
|
|
140
|
-
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
141
|
-
|
|
142
|
-
### 31. `remove_zero_values(data_frame, column_to_filter)`
|
|
143
|
-
- **Description**: Remove zero values in a specified column.
|
|
144
|
-
- **Usage**: `remove_zero_values(self, data_frame, column_to_filter)`
|
|
145
|
-
|
|
146
|
-
## Data Pulling
|
|
147
|
-
|
|
148
|
-
### 1. `pull_fred_data(data_frame, column_to_filter)`
|
|
149
|
-
- **Description**: Get data from FRED by using series id tokens.
|
|
150
|
-
- **Usage**: `pull_fred_data(week_commencing, series_id_list)`
|
|
151
|
-
|
|
152
|
-
### 2. `pull_boe_data(week_commencing)`
|
|
153
|
-
- **Description**: Fetch and process Bank of England interest rate data.
|
|
154
|
-
- **Usage**: ` pull_boe_data('mon')`
|
|
155
|
-
|
|
156
|
-
### 3. `pull_ons_data(series_list, week_commencing)`
|
|
157
|
-
- **Description**: Fetch and process time series data from the ONS API.
|
|
158
|
-
- **Usage**: `pull_ons_data(series_list, week_commencing)`
|
|
159
|
-
|
|
160
|
-
### 4. `pull_macro(country='GBR', week_commencing='mon')`
|
|
161
|
-
- **Description**: Fetch macroeconomic data from OECD and other sources for a specified country.
|
|
162
|
-
- **Usage**: `pull_macro(country='GBR', week_commencing='mon')`
|
|
163
|
-
|
|
164
|
-
### 5. `get_google_mobility_data(country, wc)`
|
|
165
|
-
- **Description**: Fetch Google Mobility data for the specified country.
|
|
166
|
-
- **Usage**: `get_google_mobility_data(country, wc)`
|
|
167
|
-
|
|
168
|
-
### 6. `pull_combined_dummies(week_commencing)`
|
|
169
|
-
- **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
|
|
170
|
-
- **Usage**: `pull_combined_dummies(week_commencing)`
|
|
171
|
-
|
|
172
|
-
### 7. `pull_weather(week_commencing, country)`
|
|
173
|
-
- **Description**: Fetch and process historical weather data for the specified country.
|
|
174
|
-
- **Usage**: `pull_weather(week_commencing, country)`
|
|
175
|
-
|
imsciences-0.6.0.3/README.md
DELETED
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
# IMS Package Documentation
|
|
2
|
-
|
|
3
|
-
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
4
|
-
|
|
5
|
-
## Data Processing
|
|
6
|
-
|
|
7
|
-
### 1. `get_wd_levels(levels)`
|
|
8
|
-
- **Description**: Get the working directory with the option of moving up parents.
|
|
9
|
-
- **Usage**: `get_wd_levels(levels)`
|
|
10
|
-
|
|
11
|
-
### 2. `remove_rows(data_frame, num_rows_to_remove)`
|
|
12
|
-
- **Description**: Removes a specified number of rows from a pandas DataFrame.
|
|
13
|
-
- **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
|
|
14
|
-
|
|
15
|
-
### 3. `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
16
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the long format.
|
|
17
|
-
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
18
|
-
|
|
19
|
-
### 4. `convert_monthly_to_daily(df, date_column)`
|
|
20
|
-
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
21
|
-
- **Usage**: `convert_monthly_to_daily(df, date_column)`
|
|
22
|
-
|
|
23
|
-
### 5. `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
24
|
-
- **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
|
|
25
|
-
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
26
|
-
|
|
27
|
-
### 6. `remove_nan_rows(df, col_to_remove_rows)`
|
|
28
|
-
- **Description**: Removes rows from a DataFrame where the specified column has NaN values.
|
|
29
|
-
- **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
|
|
30
|
-
|
|
31
|
-
### 7. `filter_rows(df, col_to_filter, list_of_filters)`
|
|
32
|
-
- **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
|
|
33
|
-
- **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
|
|
34
|
-
|
|
35
|
-
### 8. `plot_one(df1, col1, date_column)`
|
|
36
|
-
- **Description**: Plots a specified column from a DataFrame.
|
|
37
|
-
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
38
|
-
|
|
39
|
-
### 9. `week_of_year_mapping(df, week_col, start_day_str)`
|
|
40
|
-
- **Description**: Converts a week column in 'yyyy-Www' or 'yyyy-ww' format to week commencing date.
|
|
41
|
-
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
42
|
-
|
|
43
|
-
### 10. `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
44
|
-
- **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
|
|
45
|
-
- **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
46
|
-
|
|
47
|
-
### 11. `rename_cols(df, cols_to_rename)`
|
|
48
|
-
- **Description**: Renames columns in a pandas DataFrame.
|
|
49
|
-
- **Usage**: `rename_cols(df, cols_to_rename)`
|
|
50
|
-
|
|
51
|
-
### 12. `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
52
|
-
- **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
|
|
53
|
-
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
54
|
-
|
|
55
|
-
### 13. `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
56
|
-
- **Description**: Merge a list of DataFrames on a common column.
|
|
57
|
-
- **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
58
|
-
|
|
59
|
-
### 14. `merge_and_update_dfs(df1, df2, key_column)`
|
|
60
|
-
- **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
|
|
61
|
-
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
62
|
-
|
|
63
|
-
### 15. `convert_us_to_uk_dates(df, date_col)`
|
|
64
|
-
- **Description**: Convert a DataFrame column with mixed date formats to datetime.
|
|
65
|
-
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
66
|
-
|
|
67
|
-
### 16. `combine_sheets(all_sheets)`
|
|
68
|
-
- **Description**: Combines multiple DataFrames from a dictionary into a single DataFrame.
|
|
69
|
-
- **Usage**: `combine_sheets({'Sheet1': df1, 'Sheet2': df2})`
|
|
70
|
-
|
|
71
|
-
### 17. `pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
72
|
-
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
73
|
-
- **Usage**: `pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
74
|
-
|
|
75
|
-
### 18. `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping')`
|
|
76
|
-
- **Description**: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.
|
|
77
|
-
- **Usage**: `classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')`
|
|
78
|
-
|
|
79
|
-
### 19. `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
80
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the wide format.
|
|
81
|
-
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
82
|
-
|
|
83
|
-
### 20. `merge_cols_with_seperator(self, df, col_names,seperator='_',output_column_name = 'Merged',starting_prefix_str=None,ending_prefix_str=None)`
|
|
84
|
-
- **Description**: Merge multiple columns in a dataframe into 1 column with a seperator.Can be used if multiple columns are needed for a LUT.
|
|
85
|
-
- **Usage**: `merge_cols_with_seperator(df, ['Campaign','Product'],seperator='|','Merged Columns',starting_prefix_str='start_',ending_prefix_str='_end')`
|
|
86
|
-
|
|
87
|
-
### 21. `check_sum_of_df_cols_are_equal(df_1,df_2,cols_1,cols_2)`
|
|
88
|
-
- **Description**: Checks if the sum of two columns in two dataframes are the same, and provides the sums of each column and the difference between them.
|
|
89
|
-
- **Usage**: `check_sum_of_df_cols_are_equal(df_1,df_2,'Media Cost','Spend')`
|
|
90
|
-
|
|
91
|
-
### 22. `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
92
|
-
- **Description**: Can be used to create an LUT. Creates a dictionary using two columns in a dataframe.
|
|
93
|
-
- **Usage**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
94
|
-
|
|
95
|
-
### 23. `create_FY_and_H_columns(df, index_col, start_date, starting_FY,short_format='No',half_years='No',combined_FY_and_H='No')`
|
|
96
|
-
- **Description**: Used to create a financial year, half year, and financial half year column.
|
|
97
|
-
- **Usage**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023',short_format='Yes',half_years='Yes',combined_FY_and_H='Yes')`
|
|
98
|
-
|
|
99
|
-
### 24. `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict,output_column_name='Updated Column')`
|
|
100
|
-
- **Description**: Essentially provides an if statement with a xlookup if a value is something. Updates certain chosen values in a specified column of the DataFrame based on a lookup dictionary.
|
|
101
|
-
- **Usage**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel','segment','product'], qlik_dict_for_channel,output_column_name='Channel New')`
|
|
102
|
-
|
|
103
|
-
### 25. `create_new_version_of_col_using_LUT(df, keys_col,value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
104
|
-
- **Description**: Creates a new column in a dataframe, which takes an old column and uses a lookup table to changes values in the new column to reflect the lookup table. The lookup is based on a column in the dataframe.
|
|
105
|
-
- **Usage**: `keyword_lookup_replacement(df, '*Campaign Name','Campaign Type',search_campaign_name_retag_lut,'Campaign Name New')`
|
|
106
|
-
|
|
107
|
-
### 26. `convert_df_wide_2_long(df,value_cols,variable_col_name='Stacked',value_col_name='Value')`
|
|
108
|
-
- **Description**: Changes a dataframe from wide to long format.
|
|
109
|
-
- **Usage**: `keyword_lookup_replacement(df, ['Media Cost','Impressions','Clicks'],variable_col_name='Metric')`
|
|
110
|
-
|
|
111
|
-
### 27. `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
112
|
-
- **Description**: Allows the capability to manually update any cell in dataframe by applying filters and chosing a column to edit in dataframe.
|
|
113
|
-
- **Usage**: `keyword_lookup_replacement(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''},'Master Include',1,change_in_existing_df_col = 'Yes',new_col_to_change_name = 'Master Include',manual_edit_col_name = 'Manual Changes')`
|
|
114
|
-
|
|
115
|
-
### 28. `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
116
|
-
- **Description**: Converts data in numerical format into numbers with commas and a chosen decimal place length.
|
|
117
|
-
- **Usage**: `format_numbers_with_commas(df,1)`
|
|
118
|
-
|
|
119
|
-
### 29. `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
120
|
-
- **Description**: Filters dataframe on multiple conditions, which come in the form of a dictionary.
|
|
121
|
-
- **Usage**: `filter_df_on_multiple_conditions(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''})`
|
|
122
|
-
|
|
123
|
-
### 30. `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
124
|
-
- **Description**: Read and Concatinate all files of one type in a folder.
|
|
125
|
-
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
126
|
-
|
|
127
|
-
### 31. `remove_zero_values(data_frame, column_to_filter)`
|
|
128
|
-
- **Description**: Remove zero values in a specified column.
|
|
129
|
-
- **Usage**: `remove_zero_values(self, data_frame, column_to_filter)`
|
|
130
|
-
|
|
131
|
-
## Data Pulling
|
|
132
|
-
|
|
133
|
-
### 1. `pull_fred_data(data_frame, column_to_filter)`
|
|
134
|
-
- **Description**: Get data from FRED by using series id tokens.
|
|
135
|
-
- **Usage**: `pull_fred_data(week_commencing, series_id_list)`
|
|
136
|
-
|
|
137
|
-
### 2. `pull_boe_data(week_commencing)`
|
|
138
|
-
- **Description**: Fetch and process Bank of England interest rate data.
|
|
139
|
-
- **Usage**: ` pull_boe_data('mon')`
|
|
140
|
-
|
|
141
|
-
### 3. `pull_ons_data(series_list, week_commencing)`
|
|
142
|
-
- **Description**: Fetch and process time series data from the ONS API.
|
|
143
|
-
- **Usage**: `pull_ons_data(series_list, week_commencing)`
|
|
144
|
-
|
|
145
|
-
### 4. `pull_macro(country='GBR', week_commencing='mon')`
|
|
146
|
-
- **Description**: Fetch macroeconomic data from OECD and other sources for a specified country.
|
|
147
|
-
- **Usage**: `pull_macro(country='GBR', week_commencing='mon')`
|
|
148
|
-
|
|
149
|
-
### 5. `get_google_mobility_data(country, wc)`
|
|
150
|
-
- **Description**: Fetch Google Mobility data for the specified country.
|
|
151
|
-
- **Usage**: `get_google_mobility_data(country, wc)`
|
|
152
|
-
|
|
153
|
-
### 6. `pull_combined_dummies(week_commencing)`
|
|
154
|
-
- **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
|
|
155
|
-
- **Usage**: `pull_combined_dummies(week_commencing)`
|
|
156
|
-
|
|
157
|
-
### 7. `pull_weather(week_commencing, country)`
|
|
158
|
-
- **Description**: Fetch and process historical weather data for the specified country.
|
|
159
|
-
- **Usage**: `pull_weather(week_commencing, country)`
|
|
160
|
-
|
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: imsciences
|
|
3
|
-
Version: 0.6.0.3
|
|
4
|
-
Summary: IMS Data Processing Package
|
|
5
|
-
Author: IMS
|
|
6
|
-
Author-email: cam@im-sciences.com
|
|
7
|
-
Keywords: python,data processing
|
|
8
|
-
Classifier: Development Status :: 3 - Alpha
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Operating System :: Unix
|
|
12
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
|
|
16
|
-
# IMS Package Documentation
|
|
17
|
-
|
|
18
|
-
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
19
|
-
|
|
20
|
-
## Data Processing
|
|
21
|
-
|
|
22
|
-
### 1. `get_wd_levels(levels)`
|
|
23
|
-
- **Description**: Get the working directory with the option of moving up parents.
|
|
24
|
-
- **Usage**: `get_wd_levels(levels)`
|
|
25
|
-
|
|
26
|
-
### 2. `remove_rows(data_frame, num_rows_to_remove)`
|
|
27
|
-
- **Description**: Removes a specified number of rows from a pandas DataFrame.
|
|
28
|
-
- **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
|
|
29
|
-
|
|
30
|
-
### 3. `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
31
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the long format.
|
|
32
|
-
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
33
|
-
|
|
34
|
-
### 4. `convert_monthly_to_daily(df, date_column)`
|
|
35
|
-
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
36
|
-
- **Usage**: `convert_monthly_to_daily(df, date_column)`
|
|
37
|
-
|
|
38
|
-
### 5. `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
39
|
-
- **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
|
|
40
|
-
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
41
|
-
|
|
42
|
-
### 6. `remove_nan_rows(df, col_to_remove_rows)`
|
|
43
|
-
- **Description**: Removes rows from a DataFrame where the specified column has NaN values.
|
|
44
|
-
- **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
|
|
45
|
-
|
|
46
|
-
### 7. `filter_rows(df, col_to_filter, list_of_filters)`
|
|
47
|
-
- **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
|
|
48
|
-
- **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
|
|
49
|
-
|
|
50
|
-
### 8. `plot_one(df1, col1, date_column)`
|
|
51
|
-
- **Description**: Plots a specified column from a DataFrame.
|
|
52
|
-
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
53
|
-
|
|
54
|
-
### 9. `week_of_year_mapping(df, week_col, start_day_str)`
|
|
55
|
-
- **Description**: Converts a week column in 'yyyy-Www' or 'yyyy-ww' format to week commencing date.
|
|
56
|
-
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
57
|
-
|
|
58
|
-
### 10. `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
59
|
-
- **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
|
|
60
|
-
- **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
61
|
-
|
|
62
|
-
### 11. `rename_cols(df, cols_to_rename)`
|
|
63
|
-
- **Description**: Renames columns in a pandas DataFrame.
|
|
64
|
-
- **Usage**: `rename_cols(df, cols_to_rename)`
|
|
65
|
-
|
|
66
|
-
### 12. `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
67
|
-
- **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
|
|
68
|
-
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
69
|
-
|
|
70
|
-
### 13. `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
71
|
-
- **Description**: Merge a list of DataFrames on a common column.
|
|
72
|
-
- **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
73
|
-
|
|
74
|
-
### 14. `merge_and_update_dfs(df1, df2, key_column)`
|
|
75
|
-
- **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
|
|
76
|
-
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
77
|
-
|
|
78
|
-
### 15. `convert_us_to_uk_dates(df, date_col)`
|
|
79
|
-
- **Description**: Convert a DataFrame column with mixed date formats to datetime.
|
|
80
|
-
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
81
|
-
|
|
82
|
-
### 16. `combine_sheets(all_sheets)`
|
|
83
|
-
- **Description**: Combines multiple DataFrames from a dictionary into a single DataFrame.
|
|
84
|
-
- **Usage**: `combine_sheets({'Sheet1': df1, 'Sheet2': df2})`
|
|
85
|
-
|
|
86
|
-
### 17. `pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
87
|
-
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
88
|
-
- **Usage**: `pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)`
|
|
89
|
-
|
|
90
|
-
### 18. `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping')`
|
|
91
|
-
- **Description**: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.
|
|
92
|
-
- **Usage**: `classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')`
|
|
93
|
-
|
|
94
|
-
### 19. `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
95
|
-
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week. In the wide format.
|
|
96
|
-
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
97
|
-
|
|
98
|
-
### 20. `merge_cols_with_seperator(self, df, col_names,seperator='_',output_column_name = 'Merged',starting_prefix_str=None,ending_prefix_str=None)`
|
|
99
|
-
- **Description**: Merge multiple columns in a dataframe into 1 column with a seperator.Can be used if multiple columns are needed for a LUT.
|
|
100
|
-
- **Usage**: `merge_cols_with_seperator(df, ['Campaign','Product'],seperator='|','Merged Columns',starting_prefix_str='start_',ending_prefix_str='_end')`
|
|
101
|
-
|
|
102
|
-
### 21. `check_sum_of_df_cols_are_equal(df_1,df_2,cols_1,cols_2)`
|
|
103
|
-
- **Description**: Checks if the sum of two columns in two dataframes are the same, and provides the sums of each column and the difference between them.
|
|
104
|
-
- **Usage**: `check_sum_of_df_cols_are_equal(df_1,df_2,'Media Cost','Spend')`
|
|
105
|
-
|
|
106
|
-
### 22. `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
107
|
-
- **Description**: Can be used to create an LUT. Creates a dictionary using two columns in a dataframe.
|
|
108
|
-
- **Usage**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
109
|
-
|
|
110
|
-
### 23. `create_FY_and_H_columns(df, index_col, start_date, starting_FY,short_format='No',half_years='No',combined_FY_and_H='No')`
|
|
111
|
-
- **Description**: Used to create a financial year, half year, and financial half year column.
|
|
112
|
-
- **Usage**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023',short_format='Yes',half_years='Yes',combined_FY_and_H='Yes')`
|
|
113
|
-
|
|
114
|
-
### 24. `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict,output_column_name='Updated Column')`
|
|
115
|
-
- **Description**: Essentially provides an if statement with a xlookup if a value is something. Updates certain chosen values in a specified column of the DataFrame based on a lookup dictionary.
|
|
116
|
-
- **Usage**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel','segment','product'], qlik_dict_for_channel,output_column_name='Channel New')`
|
|
117
|
-
|
|
118
|
-
### 25. `create_new_version_of_col_using_LUT(df, keys_col,value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
119
|
-
- **Description**: Creates a new column in a dataframe, which takes an old column and uses a lookup table to changes values in the new column to reflect the lookup table. The lookup is based on a column in the dataframe.
|
|
120
|
-
- **Usage**: `keyword_lookup_replacement(df, '*Campaign Name','Campaign Type',search_campaign_name_retag_lut,'Campaign Name New')`
|
|
121
|
-
|
|
122
|
-
### 26. `convert_df_wide_2_long(df,value_cols,variable_col_name='Stacked',value_col_name='Value')`
|
|
123
|
-
- **Description**: Changes a dataframe from wide to long format.
|
|
124
|
-
- **Usage**: `keyword_lookup_replacement(df, ['Media Cost','Impressions','Clicks'],variable_col_name='Metric')`
|
|
125
|
-
|
|
126
|
-
### 27. `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
127
|
-
- **Description**: Allows the capability to manually update any cell in dataframe by applying filters and chosing a column to edit in dataframe.
|
|
128
|
-
- **Usage**: `keyword_lookup_replacement(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''},'Master Include',1,change_in_existing_df_col = 'Yes',new_col_to_change_name = 'Master Include',manual_edit_col_name = 'Manual Changes')`
|
|
129
|
-
|
|
130
|
-
### 28. `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
131
|
-
- **Description**: Converts data in numerical format into numbers with commas and a chosen decimal place length.
|
|
132
|
-
- **Usage**: `format_numbers_with_commas(df,1)`
|
|
133
|
-
|
|
134
|
-
### 29. `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
135
|
-
- **Description**: Filters dataframe on multiple conditions, which come in the form of a dictionary.
|
|
136
|
-
- **Usage**: `filter_df_on_multiple_conditions(df, {'OBS':' <= datetime(2023,1,23)','File_Name':' == 'France media''})`
|
|
137
|
-
|
|
138
|
-
### 30. `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
139
|
-
- **Description**: Read and Concatinate all files of one type in a folder.
|
|
140
|
-
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
141
|
-
|
|
142
|
-
### 31. `remove_zero_values(data_frame, column_to_filter)`
|
|
143
|
-
- **Description**: Remove zero values in a specified column.
|
|
144
|
-
- **Usage**: `remove_zero_values(self, data_frame, column_to_filter)`
|
|
145
|
-
|
|
146
|
-
## Data Pulling
|
|
147
|
-
|
|
148
|
-
### 1. `pull_fred_data(data_frame, column_to_filter)`
|
|
149
|
-
- **Description**: Get data from FRED by using series id tokens.
|
|
150
|
-
- **Usage**: `pull_fred_data(week_commencing, series_id_list)`
|
|
151
|
-
|
|
152
|
-
### 2. `pull_boe_data(week_commencing)`
|
|
153
|
-
- **Description**: Fetch and process Bank of England interest rate data.
|
|
154
|
-
- **Usage**: ` pull_boe_data('mon')`
|
|
155
|
-
|
|
156
|
-
### 3. `pull_ons_data(series_list, week_commencing)`
|
|
157
|
-
- **Description**: Fetch and process time series data from the ONS API.
|
|
158
|
-
- **Usage**: `pull_ons_data(series_list, week_commencing)`
|
|
159
|
-
|
|
160
|
-
### 4. `pull_macro(country='GBR', week_commencing='mon')`
|
|
161
|
-
- **Description**: Fetch macroeconomic data from OECD and other sources for a specified country.
|
|
162
|
-
- **Usage**: `pull_macro(country='GBR', week_commencing='mon')`
|
|
163
|
-
|
|
164
|
-
### 5. `get_google_mobility_data(country, wc)`
|
|
165
|
-
- **Description**: Fetch Google Mobility data for the specified country.
|
|
166
|
-
- **Usage**: `get_google_mobility_data(country, wc)`
|
|
167
|
-
|
|
168
|
-
### 6. `pull_combined_dummies(week_commencing)`
|
|
169
|
-
- **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
|
|
170
|
-
- **Usage**: `pull_combined_dummies(week_commencing)`
|
|
171
|
-
|
|
172
|
-
### 7. `pull_weather(week_commencing, country)`
|
|
173
|
-
- **Description**: Fetch and process historical weather data for the specified country.
|
|
174
|
-
- **Usage**: `pull_weather(week_commencing, country)`
|
|
175
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|