imsciences 0.8__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2127,14 +2127,27 @@ class datapull:
2127
2127
  day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
2128
2128
 
2129
2129
  # Create daily date range dataframe starting from start_date
2130
- date_range = pd.date_range(start=pd.to_datetime(start_date), end=datetime.today(), freq="d")
2130
+ date_range = pd.date_range(
2131
+ start=pd.to_datetime(start_date),
2132
+ end=datetime.today(),
2133
+ freq="D"
2134
+ )
2131
2135
  df_daily = pd.DataFrame(date_range, columns=["Date"])
2132
-
2133
- # Create weekly date range dataframe
2134
- df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2136
+
2137
+ # ------------------------------------------------
2138
+ # 1. Identify "week_start" for each daily row
2139
+ # ------------------------------------------------
2140
+ df_daily['week_start'] = df_daily["Date"].apply(
2141
+ lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
2142
+ )
2143
+
2144
+ # ------------------------------------------------
2145
+ # 2. Build a weekly index (df_weekly_start) with dummy columns
2146
+ # ------------------------------------------------
2135
2147
  df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
2136
2148
  df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
2137
2149
 
2150
+ # Set index to weekly "start of week"
2138
2151
  df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
2139
2152
  df_weekly_start.set_index("Date", inplace=True)
2140
2153
 
@@ -2144,76 +2157,122 @@ class datapull:
2144
2157
  col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
2145
2158
  dummy_columns[col_name] = [0] * len(df_weekly_start)
2146
2159
  dummy_columns[col_name][i] = 1
2147
-
2160
+
2148
2161
  df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
2149
2162
  df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
2150
-
2151
- # Add public holidays for each country and holiday type
2163
+
2164
+ # ------------------------------------------------
2165
+ # 3. Public holidays (daily) and specific holiday columns
2166
+ # ------------------------------------------------
2152
2167
  for country in countries:
2153
- country_holidays = holidays.CountryHoliday(country, years=range(int(start_date[:4]), datetime.today().year + 1))
2154
- df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(lambda x: 1 if x in country_holidays else 0)
2155
-
2156
- # Extract specific holidays
2157
- for date, name in country_holidays.items():
2168
+ country_holidays = holidays.CountryHoliday(
2169
+ country,
2170
+ years=range(int(start_date[:4]), datetime.today().year + 1)
2171
+ )
2172
+ # Daily indicator: 1 if that date is a holiday
2173
+ df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(
2174
+ lambda x: 1 if x in country_holidays else 0
2175
+ )
2176
+ # Create columns for specific holiday names
2177
+ for date_hol, name in country_holidays.items():
2158
2178
  col_name = f"seas_{name.replace(' ', '_').lower()}_{country.lower()}"
2159
2179
  if col_name not in df_daily.columns:
2160
2180
  df_daily[col_name] = 0
2161
- df_daily.loc[df_daily["Date"] == pd.Timestamp(date), col_name] = 1
2181
+ df_daily.loc[df_daily["Date"] == pd.Timestamp(date_hol), col_name] = 1
2182
+
2183
+ # ------------------------------------------------
2184
+ # 4. Add daily indicators for last day & last Friday of month
2185
+ # Then aggregate them to weekly level using .max()
2186
+ # ------------------------------------------------
2187
+ # Last day of month (daily)
2188
+ df_daily["seas_last_day_of_month"] = df_daily["Date"].apply(
2189
+ lambda d: 1 if d == d.to_period("M").to_timestamp("M") else 0
2190
+ )
2162
2191
 
2163
- # Map daily holidays to weekly aggregation
2164
- df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2165
- df_holidays = df_daily.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
2166
- df_holidays.set_index("Date", inplace=True)
2192
+ # Last Friday of month (daily)
2193
+ def is_last_friday(date):
2194
+ # last day of the month
2195
+ last_day_of_month = date.to_period("M").to_timestamp("M")
2196
+ last_day_weekday = last_day_of_month.dayofweek
2197
+ # Determine how many days we go back from the last day to get Friday
2198
+ if last_day_weekday >= 4:
2199
+ days_to_subtract = last_day_weekday - 4
2200
+ else:
2201
+ days_to_subtract = last_day_weekday + 3
2202
+ last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
2203
+ return 1 if date == last_friday else 0
2167
2204
 
2168
- # Create monthly dummies (separately from holidays)
2169
- df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
2170
- df_monthly_dummies = pd.get_dummies(df_daily, prefix="seas", columns=["Month"], dtype=int)
2171
- df_monthly_dummies['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2172
- df_monthly_dummies = df_monthly_dummies.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
2173
- df_monthly_dummies.set_index("Date", inplace=True)
2205
+ df_daily["seas_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
2174
2206
 
2175
- # Divide only the monthly dummy columns by 7 (exclude holiday-related columns)
2176
- monthly_cols = [col for col in df_monthly_dummies.columns if not col.startswith("seas_holiday") and not col.startswith("seas_")]
2207
+ # ------------------------------------------------
2208
+ # 5. Weekly aggregation for HOLIDAYS & monthly dummies
2209
+ # (Using .max() for holiday indicators so they become binary)
2210
+ # ------------------------------------------------
2211
+ # For monthly dummies, create a daily col "Month", then get_dummies
2212
+ df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
2213
+ df_monthly_dummies = pd.get_dummies(
2214
+ df_daily,
2215
+ prefix="seas",
2216
+ columns=["Month"],
2217
+ dtype=int
2218
+ )
2219
+ # Recalculate 'week_start' (already in df_daily, but just to be sure)
2220
+ df_monthly_dummies['week_start'] = df_daily['week_start']
2221
+
2222
+ # Group monthly dummies by .sum() or .mean()—often we average across the week
2223
+ df_monthly_dummies = (
2224
+ df_monthly_dummies
2225
+ .groupby('week_start')
2226
+ .sum(numeric_only=True) # sum the daily flags
2227
+ .reset_index()
2228
+ .rename(columns={'week_start': "Date"})
2229
+ .set_index("Date")
2230
+ )
2231
+ # Divide the monthly dummy columns by 7 to spread them across the week
2232
+ monthly_cols = [
2233
+ c for c in df_monthly_dummies.columns
2234
+ if c.startswith("seas_month_")
2235
+ ]
2177
2236
  df_monthly_dummies[monthly_cols] = df_monthly_dummies[monthly_cols] / 7
2178
-
2179
- # Merge weekly dummies, monthly dummies, and holidays
2180
- df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1) # Combine weekly and monthly first
2181
- df_combined = pd.concat([df_combined, df_holidays], axis=1) # Add holidays separately
2182
-
2183
- # Drop duplicate columns if any exist (this ensures holidays are not duplicated)
2237
+
2238
+ # Group holiday columns (and last-day-of-month columns) by .max() => binary
2239
+ df_holidays = (
2240
+ df_daily
2241
+ .groupby('week_start')
2242
+ .max(numeric_only=True) # use max => if any day=1, entire week=1
2243
+ .reset_index()
2244
+ .rename(columns={'week_start': "Date"})
2245
+ .set_index("Date")
2246
+ )
2247
+
2248
+ # ------------------------------------------------
2249
+ # 6. Combine weekly start, monthly dummies, holiday flags
2250
+ # ------------------------------------------------
2251
+ df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
2252
+ df_combined = pd.concat([df_combined, df_holidays], axis=1)
2184
2253
  df_combined = df_combined.loc[:, ~df_combined.columns.duplicated()]
2185
2254
 
2186
- # Create weekly dummies
2255
+ # ------------------------------------------------
2256
+ # 7. Create weekly dummies for Week of Year & yearly dummies
2257
+ # ------------------------------------------------
2187
2258
  df_combined.reset_index(inplace=True)
2259
+ df_combined.rename(columns={"index": "old_index"}, inplace=True) # just in case
2260
+
2188
2261
  df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
2189
2262
  df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Week"], dtype=int)
2190
2263
 
2191
- # Create yearly dummies
2192
2264
  df_combined["Year"] = df_combined["Date"].dt.year
2193
2265
  df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"], dtype=int)
2194
2266
 
2195
- # Add constant
2267
+ # ------------------------------------------------
2268
+ # 8. Add constant & trend
2269
+ # ------------------------------------------------
2196
2270
  df_combined["Constant"] = 1
2197
-
2198
- # Add trend
2199
2271
  df_combined["Trend"] = df_combined.index + 1
2200
-
2201
- # Create seasonal indicators for the last day and last Friday of the month
2202
- df_combined['seas_last_day_of_month'] = df_combined["Date"].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
2203
-
2204
- def is_last_friday(date):
2205
- last_day_of_month = date.to_period('M').to_timestamp('M')
2206
- last_day_weekday = last_day_of_month.dayofweek
2207
- if last_day_weekday >= 4:
2208
- days_to_subtract = last_day_weekday - 4
2209
- else:
2210
- days_to_subtract = last_day_weekday + 3
2211
- last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
2212
- return 1 if date == last_friday else 0
2213
-
2214
- df_combined['seas_last_friday_of_month'] = df_combined["Date"].apply(is_last_friday)
2215
2272
 
2216
- # Rename Date to OBS
2273
+ # ------------------------------------------------
2274
+ # 9. Rename Date -> OBS and return
2275
+ # ------------------------------------------------
2217
2276
  df_combined.rename(columns={"Date": "OBS"}, inplace=True)
2218
2277
 
2219
2278
  return df_combined
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.8
3
+ Version: 0.8.1
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -35,97 +35,97 @@ The **IMSciences package** is a Python library designed to process incoming data
35
35
 
36
36
  ---
37
37
 
38
- ## Table of Contents
38
+ Table of Contents
39
+ =================
39
40
 
40
- 1. [Data Processing](#data-processing)
41
- 2. [Data Pulling](#data-pulling)
42
- 3. [Installation](#installation)
43
- 4. [Useage](#useage)
44
- 5. [License](#license)
41
+ 1. `Data Processing <#data-processing>`_
42
+ 2. `Data Pulling <#data-pulling>`_
43
+ 3. `Installation <#installation>`_
44
+ 4. `Usage <#usage>`_
45
+ 5. `License <#license>`_
45
46
 
46
47
  ---
47
48
 
48
49
  ## Data Processing
49
50
 
50
-
51
- ## 1. `get_wd_levels`
51
+ ## 1. get_wd_levels
52
52
  - **Description**: Get the working directory with the option of moving up parents.
53
53
  - **Usage**: `get_wd_levels(levels)`
54
54
  - **Example**: `get_wd_levels(0)`
55
55
 
56
56
  ---
57
57
 
58
- ## 2. `remove_rows`
58
+ ## 2. remove_rows
59
59
  - **Description**: Removes a specified number of rows from a pandas DataFrame.
60
60
  - **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
61
61
  - **Example**: `remove_rows(df, 2)`
62
62
 
63
63
  ---
64
64
 
65
- ## 3. `aggregate_daily_to_wc_long`
65
+ ## 3. aggregate_daily_to_wc_long
66
66
  - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
67
67
  - **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
68
68
  - **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
69
69
 
70
70
  ---
71
71
 
72
- ## 4. `convert_monthly_to_daily`
72
+ ## 4. convert_monthly_to_daily
73
73
  - **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
74
74
  - **Usage**: `convert_monthly_to_daily(df, date_column, divide)`
75
75
  - **Example**: `convert_monthly_to_daily(df, 'date')`
76
76
 
77
77
  ---
78
78
 
79
- ## 5. `plot_two`
79
+ ## 5. plot_two
80
80
  - **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
81
81
  - **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
82
82
  - **Example**: `plot_two(df1, 'cost', df2, 'cost', 'obs', True)`
83
83
 
84
84
  ---
85
85
 
86
- ## 6. `remove_nan_rows`
86
+ ## 6. remove_nan_rows
87
87
  - **Description**: Removes rows from a DataFrame where the specified column has NaN values.
88
88
  - **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
89
89
  - **Example**: `remove_nan_rows(df, 'date')`
90
90
 
91
91
  ---
92
92
 
93
- ## 7. `filter_rows`
93
+ ## 7. filter_rows
94
94
  - **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
95
95
  - **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
96
96
  - **Example**: `filter_rows(df, 'country', ['UK', 'IE'])`
97
97
 
98
98
  ---
99
99
 
100
- ## 8. `plot_one`
100
+ ## 8. plot_one
101
101
  - **Description**: Plots a specified column from a DataFrame.
102
102
  - **Usage**: `plot_one(df1, col1, date_column)`
103
103
  - **Example**: `plot_one(df, 'Spend', 'OBS')`
104
104
 
105
105
  ---
106
106
 
107
- ## 9. `week_of_year_mapping`
107
+ ## 9. week_of_year_mapping
108
108
  - **Description**: Converts a week column in `yyyy-Www` or `yyyy-ww` format to week commencing date.
109
109
  - **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
110
110
  - **Example**: `week_of_year_mapping(df, 'week', 'mon')`
111
111
 
112
112
  ---
113
113
 
114
- ## 10. `exclude_rows`
114
+ ## 10. exclude_rows
115
115
  - **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
116
116
  - **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
117
117
  - **Example**: `exclude_rows(df, 'week', ['2022-W20', '2022-W21'])`
118
118
 
119
119
  ---
120
120
 
121
- ## 11. `rename_cols`
121
+ ## 11. rename_cols
122
122
  - **Description**: Renames columns in a pandas DataFrame.
123
123
  - **Usage**: `rename_cols(df, name)`
124
124
  - **Example**: `rename_cols(df, 'ame_facebook')`
125
125
 
126
126
  ---
127
127
 
128
- ## 12. `merge_new_and_old`
128
+ ## 12. merge_new_and_old
129
129
  - **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
130
130
  - Merges numeric values from specified columns in the old and new DataFrames based on a given cutoff date.
131
131
  - **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
@@ -133,21 +133,21 @@ The **IMSciences package** is a Python library designed to process incoming data
133
133
 
134
134
  ---
135
135
 
136
- ## 13. `merge_dataframes_on_date`
136
+ ## 13. merge_dataframes_on_date
137
137
  - **Description**: Merge a list of DataFrames on a common column.
138
138
  - **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
139
139
  - **Example**: `merge_dataframes_on_date([df1, df2, df3], common_column='OBS', merge_how='outer')`
140
140
 
141
141
  ---
142
142
 
143
- ## 14. `merge_and_update_dfs`
143
+ ## 14. merge_and_update_dfs
144
144
  - **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
145
145
  - **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
146
146
  - **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
147
147
 
148
148
  ---
149
149
 
150
- ## 15. `convert_us_to_uk_dates`
150
+ ## 15. convert_us_to_uk_dates
151
151
  - **Description**: Convert a DataFrame column with mixed date formats to datetime.
152
152
  - **Usage**: `convert_us_to_uk_dates(df, date_col)`
153
153
  - **Example**: `convert_us_to_uk_dates(df, 'date')`
@@ -161,189 +161,189 @@ The **IMSciences package** is a Python library designed to process incoming data
161
161
 
162
162
  ---
163
163
 
164
- ## 17. `pivot_table`
164
+ ## 17. pivot_table
165
165
  - **Description**: Dynamically pivots a DataFrame based on specified columns.
166
166
  - **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
167
167
  - **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1', 'OBS': ' >= datetime(2019,9,9)', 'Metric Short Names': ' == spd'}, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=True, fill_missing_weekly_dates=True, week_commencing='W-MON')`
168
168
 
169
169
  ---
170
170
 
171
- ## 18. `apply_lookup_table_for_columns`
171
+ ## 18. apply_lookup_table_for_columns
172
172
  - **Description**: Equivalent of XLOOKUP in Excel. Allows mapping of a dictionary of substrings within a column.
173
173
  - **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
174
174
  - **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd', 'clicks': 'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
175
175
 
176
176
  ---
177
177
 
178
- ## 19. `aggregate_daily_to_wc_wide`
178
+ ## 19. aggregate_daily_to_wc_wide
179
179
  - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
180
180
  - **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
181
181
  - **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average', True)`
182
182
 
183
183
  ---
184
184
 
185
- ## 20. `merge_cols_with_seperator`
185
+ ## 20. merge_cols_with_seperator
186
186
  - **Description**: Merges multiple columns in a DataFrame into one column with a separator `_`. Useful for lookup tables.
187
187
  - **Usage**: `merge_cols_with_seperator(df, col_names, seperator='_', output_column_name='Merged', starting_prefix_str=None, ending_prefix_str=None)`
188
188
  - **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], seperator='|', output_column_name='Merged Columns', starting_prefix_str='start_', ending_prefix_str='_end')`
189
189
 
190
190
  ---
191
191
 
192
- ## 21. `check_sum_of_df_cols_are_equal`
192
+ ## 21. check_sum_of_df_cols_are_equal
193
193
  - **Description**: Checks if the sum of two columns in two DataFrames are the same, and provides the sums and differences.
194
194
  - **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
195
195
  - **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
196
196
 
197
197
  ---
198
198
 
199
- ## 22. `convert_2_df_cols_to_dict`
199
+ ## 22. convert_2_df_cols_to_dict
200
200
  - **Description**: Creates a dictionary using two columns in a DataFrame.
201
201
  - **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
202
202
  - **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
203
203
 
204
204
  ---
205
205
 
206
- ## 23. `create_FY_and_H_columns`
206
+ ## 23. create_FY_and_H_columns
207
207
  - **Description**: Creates financial year, half-year, and financial half-year columns.
208
208
  - **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
209
209
  - **Example**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023', short_format='Yes', half_years='Yes', combined_FY_and_H='Yes')`
210
210
 
211
211
  ---
212
212
 
213
- ## 24. `keyword_lookup_replacement`
213
+ ## 24. keyword_lookup_replacement
214
214
  - **Description**: Updates chosen values in a specified column of the DataFrame based on a lookup dictionary.
215
215
  - **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
216
216
  - **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment', 'product'], qlik_dict_for_channel, output_column_name='Channel New')`
217
217
 
218
218
  ---
219
219
 
220
- ## 25. `create_new_version_of_col_using_LUT`
220
+ ## 25. create_new_version_of_col_using_LUT
221
221
  - **Description**: Creates a new column in a DataFrame by mapping values from an old column using a lookup table.
222
222
  - **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
223
223
  - **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', search_campaign_name_retag_lut, 'Campaign Name New')`
224
224
 
225
225
  ---
226
226
 
227
- ## 26. `convert_df_wide_2_long`
227
+ ## 26. convert_df_wide_2_long
228
228
  - **Description**: Converts a DataFrame from wide to long format.
229
229
  - **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
230
230
  - **Example**: `convert_df_wide_2_long(df, ['Media Cost', 'Impressions', 'Clicks'], variable_col_name='Metric')`
231
231
 
232
232
  ---
233
233
 
234
- ## 27. `manually_edit_data`
234
+ ## 27. manually_edit_data
235
235
  - **Description**: Enables manual updates to DataFrame cells by applying filters and editing a column.
236
236
  - **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
237
237
  - **Example**: `manually_edit_data(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'}, 'Master Include', 1, change_in_existing_df_col='Yes', new_col_to_change_name='Master Include', manual_edit_col_name='Manual Changes')`
238
238
 
239
239
  ---
240
240
 
241
- ## 28. `format_numbers_with_commas`
241
+ ## 28. format_numbers_with_commas
242
242
  - **Description**: Formats numeric data into numbers with commas and specified decimal places.
243
243
  - **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
244
244
  - **Example**: `format_numbers_with_commas(df, 1)`
245
245
 
246
246
  ---
247
247
 
248
- ## 29. `filter_df_on_multiple_conditions`
248
+ ## 29. filter_df_on_multiple_conditions
249
249
  - **Description**: Filters a DataFrame based on multiple conditions from a dictionary.
250
250
  - **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
251
251
  - **Example**: `filter_df_on_multiple_conditions(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'})`
252
252
 
253
253
  ---
254
254
 
255
- ## 30. `read_and_concatenate_files`
255
+ ## 30. read_and_concatenate_files
256
256
  - **Description**: Reads and concatenates all files of a specified type in a folder.
257
257
  - **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
258
258
  - **Example**: `read_and_concatenate_files(folder_path, file_type='csv')`
259
259
 
260
260
  ---
261
261
 
262
- ## 31. `remove_zero_values`
262
+ ## 31. remove_zero_values
263
263
  - **Description**: Removes rows with zero values in a specified column.
264
264
  - **Usage**: `remove_zero_values(data_frame, column_to_filter)`
265
265
  - **Example**: `remove_zero_values(df, 'Funeral_Delivery')`
266
266
 
267
267
  ---
268
268
 
269
- ## 32. `upgrade_outdated_packages`
269
+ ## 32. upgrade_outdated_packages
270
270
  - **Description**: Upgrades all outdated packages in the environment.
271
271
  - **Usage**: `upgrade_outdated_packages()`
272
272
  - **Example**: `upgrade_outdated_packages()`
273
273
 
274
274
  ---
275
275
 
276
- ## 33. `convert_mixed_formats_dates`
276
+ ## 33. convert_mixed_formats_dates
277
277
  - **Description**: Converts a mix of US and UK date formats to datetime.
278
278
  - **Usage**: `convert_mixed_formats_dates(df, date_col)`
279
279
  - **Example**: `convert_mixed_formats_dates(df, 'OBS')`
280
280
 
281
281
  ---
282
282
 
283
- ## 34. `fill_weekly_date_range`
283
+ ## 34. fill_weekly_date_range
284
284
  - **Description**: Fills in missing weeks with zero values.
285
285
  - **Usage**: `fill_weekly_date_range(df, date_column, freq)`
286
286
  - **Example**: `fill_weekly_date_range(df, 'OBS', 'W-MON')`
287
287
 
288
288
  ---
289
289
 
290
- ## 35. `add_prefix_and_suffix`
290
+ ## 35. add_prefix_and_suffix
291
291
  - **Description**: Adds prefixes and/or suffixes to column headers.
292
292
  - **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
293
293
  - **Example**: `add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')`
294
294
 
295
295
  ---
296
296
 
297
- ## 36. `create_dummies`
297
+ ## 36. create_dummies
298
298
  - **Description**: Converts time series into binary indicators based on a threshold.
299
299
  - **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
300
300
  - **Example**: `create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')`
301
301
 
302
302
  ---
303
303
 
304
- ## 37. `replace_substrings`
304
+ ## 37. replace_substrings
305
305
  - **Description**: Replaces substrings in a column of strings using a dictionary and can change column values to lowercase.
306
306
  - **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
307
307
  - **Example**: `replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')`
308
308
 
309
309
  ---
310
310
 
311
- ## 38. `add_total_column`
311
+ ## 38. `add_total_column
312
312
  - **Description**: Sums all columns (excluding a specified column) to create a total column.
313
313
  - **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
314
314
  - **Example**: `add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')`
315
315
 
316
316
  ---
317
317
 
318
- ## 39. `apply_lookup_table_based_on_substring`
318
+ ## 39. apply_lookup_table_based_on_substring
319
319
  - **Description**: Maps substrings in a column to values using a lookup dictionary.
320
320
  - **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
321
321
  - **Example**: `apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')`
322
322
 
323
323
  ---
324
324
 
325
- ## 40. `compare_overlap`
325
+ ## 40. compare_overlap
326
326
  - **Description**: Compares matching rows and columns in two DataFrames and outputs the differences.
327
327
  - **Usage**: `compare_overlap(df1, df2, date_col)`
328
328
  - **Example**: `compare_overlap(df_1, df_2, 'obs')`
329
329
 
330
330
  ---
331
331
 
332
- ## 41. `week_commencing_2_week_commencing_conversion`
332
+ ## 41. week_commencing_2_week_commencing_conversion
333
333
  - **Description**: Converts a week commencing column to a different start day.
334
334
  - **Usage**: `week_commencing_2_week_commencing_conversion(df, date_col, week_commencing='sun')`
335
335
  - **Example**: `week_commencing_2_week_commencing_conversion(df, 'obs', week_commencing='mon')`
336
336
 
337
337
  ---
338
338
 
339
- ## 42. `plot_chart`
339
+ ## 42. plot_chart
340
340
  - **Description**: Plots various chart types including line, area, scatter, and bar.
341
341
  - **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)`
342
342
  - **Example**: `plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')`
343
343
 
344
344
  ---
345
345
 
346
- ## 43. `plot_two_with_common_cols`
346
+ ## 43. plot_two_with_common_cols
347
347
  - **Description**: Plots charts for two DataFrames based on common column names.
348
348
  - **Usage**: `plot_two_with_common_cols(df1, df2, date_column, same_axis=True)`
349
349
  - **Example**: `plot_two_with_common_cols(df_1, df_2, date_column='obs')`
@@ -411,7 +411,7 @@ The **IMSciences package** is a Python library designed to process incoming data
411
411
  Install the IMS package via pip:
412
412
 
413
413
  ```bash
414
- pip install ims-package
414
+ pip install imsciences
415
415
  ```
416
416
 
417
417
  ---
@@ -3,15 +3,15 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=7CfK2dMjPnBBw6I4st-20MdMlLjZULviFVXF2eMD9NI,80
5
5
  imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
6
- imsciences/datafunctions.py,sha256=KbZuvjJF-1gydPsb2qFlvpbVLwuG6y-lhLKt-wZ5JDI,156389
6
+ imsciences/datafunctions.py,sha256=XrvJWWFh9gdKAoeIHee2nYi0Z0zPxmW3oB6ICnGTxYc,158444
7
7
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
8
8
  imsciences/unittesting.py,sha256=d9H5HN8y7oof59hqN9mGqkjulExqFd93BEW-X8w_Id8,58142
9
9
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
11
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
12
12
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
13
- imsciences-0.8.dist-info/METADATA,sha256=moylR64i_w4kk3TPPZMpFmAPc9f0A4xJgjAY-Zy-Tac,17845
14
- imsciences-0.8.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
15
- imsciences-0.8.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
16
- imsciences-0.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
17
- imsciences-0.8.dist-info/RECORD,,
13
+ imsciences-0.8.1.dist-info/METADATA,sha256=sJK90uzVkH6KCDVM3hmkbRyGoXNmie8JMoCVLy4J7Fg,17785
14
+ imsciences-0.8.1.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
15
+ imsciences-0.8.1.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
16
+ imsciences-0.8.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
17
+ imsciences-0.8.1.dist-info/RECORD,,