imsciences 0.8__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +111 -52
- {imsciences-0.8.dist-info → imsciences-0.8.1.dist-info}/METADATA +51 -51
- {imsciences-0.8.dist-info → imsciences-0.8.1.dist-info}/RECORD +6 -6
- {imsciences-0.8.dist-info → imsciences-0.8.1.dist-info}/PKG-INFO-IMS-24Ltp-3 +0 -0
- {imsciences-0.8.dist-info → imsciences-0.8.1.dist-info}/WHEEL +0 -0
- {imsciences-0.8.dist-info → imsciences-0.8.1.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -2127,14 +2127,27 @@ class datapull:
|
|
|
2127
2127
|
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2128
2128
|
|
|
2129
2129
|
# Create daily date range dataframe starting from start_date
|
|
2130
|
-
date_range = pd.date_range(
|
|
2130
|
+
date_range = pd.date_range(
|
|
2131
|
+
start=pd.to_datetime(start_date),
|
|
2132
|
+
end=datetime.today(),
|
|
2133
|
+
freq="D"
|
|
2134
|
+
)
|
|
2131
2135
|
df_daily = pd.DataFrame(date_range, columns=["Date"])
|
|
2132
|
-
|
|
2133
|
-
#
|
|
2134
|
-
|
|
2136
|
+
|
|
2137
|
+
# ------------------------------------------------
|
|
2138
|
+
# 1. Identify "week_start" for each daily row
|
|
2139
|
+
# ------------------------------------------------
|
|
2140
|
+
df_daily['week_start'] = df_daily["Date"].apply(
|
|
2141
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
|
|
2142
|
+
)
|
|
2143
|
+
|
|
2144
|
+
# ------------------------------------------------
|
|
2145
|
+
# 2. Build a weekly index (df_weekly_start) with dummy columns
|
|
2146
|
+
# ------------------------------------------------
|
|
2135
2147
|
df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
|
|
2136
2148
|
df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
|
|
2137
2149
|
|
|
2150
|
+
# Set index to weekly "start of week"
|
|
2138
2151
|
df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
|
|
2139
2152
|
df_weekly_start.set_index("Date", inplace=True)
|
|
2140
2153
|
|
|
@@ -2144,76 +2157,122 @@ class datapull:
|
|
|
2144
2157
|
col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
|
|
2145
2158
|
dummy_columns[col_name] = [0] * len(df_weekly_start)
|
|
2146
2159
|
dummy_columns[col_name][i] = 1
|
|
2147
|
-
|
|
2160
|
+
|
|
2148
2161
|
df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
|
|
2149
2162
|
df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
|
|
2150
|
-
|
|
2151
|
-
#
|
|
2163
|
+
|
|
2164
|
+
# ------------------------------------------------
|
|
2165
|
+
# 3. Public holidays (daily) and specific holiday columns
|
|
2166
|
+
# ------------------------------------------------
|
|
2152
2167
|
for country in countries:
|
|
2153
|
-
country_holidays = holidays.CountryHoliday(
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2168
|
+
country_holidays = holidays.CountryHoliday(
|
|
2169
|
+
country,
|
|
2170
|
+
years=range(int(start_date[:4]), datetime.today().year + 1)
|
|
2171
|
+
)
|
|
2172
|
+
# Daily indicator: 1 if that date is a holiday
|
|
2173
|
+
df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(
|
|
2174
|
+
lambda x: 1 if x in country_holidays else 0
|
|
2175
|
+
)
|
|
2176
|
+
# Create columns for specific holiday names
|
|
2177
|
+
for date_hol, name in country_holidays.items():
|
|
2158
2178
|
col_name = f"seas_{name.replace(' ', '_').lower()}_{country.lower()}"
|
|
2159
2179
|
if col_name not in df_daily.columns:
|
|
2160
2180
|
df_daily[col_name] = 0
|
|
2161
|
-
df_daily.loc[df_daily["Date"] == pd.Timestamp(
|
|
2181
|
+
df_daily.loc[df_daily["Date"] == pd.Timestamp(date_hol), col_name] = 1
|
|
2182
|
+
|
|
2183
|
+
# ------------------------------------------------
|
|
2184
|
+
# 4. Add daily indicators for last day & last Friday of month
|
|
2185
|
+
# Then aggregate them to weekly level using .max()
|
|
2186
|
+
# ------------------------------------------------
|
|
2187
|
+
# Last day of month (daily)
|
|
2188
|
+
df_daily["seas_last_day_of_month"] = df_daily["Date"].apply(
|
|
2189
|
+
lambda d: 1 if d == d.to_period("M").to_timestamp("M") else 0
|
|
2190
|
+
)
|
|
2162
2191
|
|
|
2163
|
-
#
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2192
|
+
# Last Friday of month (daily)
|
|
2193
|
+
def is_last_friday(date):
|
|
2194
|
+
# last day of the month
|
|
2195
|
+
last_day_of_month = date.to_period("M").to_timestamp("M")
|
|
2196
|
+
last_day_weekday = last_day_of_month.dayofweek
|
|
2197
|
+
# Determine how many days we go back from the last day to get Friday
|
|
2198
|
+
if last_day_weekday >= 4:
|
|
2199
|
+
days_to_subtract = last_day_weekday - 4
|
|
2200
|
+
else:
|
|
2201
|
+
days_to_subtract = last_day_weekday + 3
|
|
2202
|
+
last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
|
|
2203
|
+
return 1 if date == last_friday else 0
|
|
2167
2204
|
|
|
2168
|
-
|
|
2169
|
-
df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
|
|
2170
|
-
df_monthly_dummies = pd.get_dummies(df_daily, prefix="seas", columns=["Month"], dtype=int)
|
|
2171
|
-
df_monthly_dummies['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2172
|
-
df_monthly_dummies = df_monthly_dummies.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
|
|
2173
|
-
df_monthly_dummies.set_index("Date", inplace=True)
|
|
2205
|
+
df_daily["seas_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
|
|
2174
2206
|
|
|
2175
|
-
#
|
|
2176
|
-
|
|
2207
|
+
# ------------------------------------------------
|
|
2208
|
+
# 5. Weekly aggregation for HOLIDAYS & monthly dummies
|
|
2209
|
+
# (Using .max() for holiday indicators so they become binary)
|
|
2210
|
+
# ------------------------------------------------
|
|
2211
|
+
# For monthly dummies, create a daily col "Month", then get_dummies
|
|
2212
|
+
df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
|
|
2213
|
+
df_monthly_dummies = pd.get_dummies(
|
|
2214
|
+
df_daily,
|
|
2215
|
+
prefix="seas",
|
|
2216
|
+
columns=["Month"],
|
|
2217
|
+
dtype=int
|
|
2218
|
+
)
|
|
2219
|
+
# Recalculate 'week_start' (already in df_daily, but just to be sure)
|
|
2220
|
+
df_monthly_dummies['week_start'] = df_daily['week_start']
|
|
2221
|
+
|
|
2222
|
+
# Group monthly dummies by .sum() or .mean()—often we average across the week
|
|
2223
|
+
df_monthly_dummies = (
|
|
2224
|
+
df_monthly_dummies
|
|
2225
|
+
.groupby('week_start')
|
|
2226
|
+
.sum(numeric_only=True) # sum the daily flags
|
|
2227
|
+
.reset_index()
|
|
2228
|
+
.rename(columns={'week_start': "Date"})
|
|
2229
|
+
.set_index("Date")
|
|
2230
|
+
)
|
|
2231
|
+
# Divide the monthly dummy columns by 7 to spread them across the week
|
|
2232
|
+
monthly_cols = [
|
|
2233
|
+
c for c in df_monthly_dummies.columns
|
|
2234
|
+
if c.startswith("seas_month_")
|
|
2235
|
+
]
|
|
2177
2236
|
df_monthly_dummies[monthly_cols] = df_monthly_dummies[monthly_cols] / 7
|
|
2178
|
-
|
|
2179
|
-
#
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2237
|
+
|
|
2238
|
+
# Group holiday columns (and last-day-of-month columns) by .max() => binary
|
|
2239
|
+
df_holidays = (
|
|
2240
|
+
df_daily
|
|
2241
|
+
.groupby('week_start')
|
|
2242
|
+
.max(numeric_only=True) # use max => if any day=1, entire week=1
|
|
2243
|
+
.reset_index()
|
|
2244
|
+
.rename(columns={'week_start': "Date"})
|
|
2245
|
+
.set_index("Date")
|
|
2246
|
+
)
|
|
2247
|
+
|
|
2248
|
+
# ------------------------------------------------
|
|
2249
|
+
# 6. Combine weekly start, monthly dummies, holiday flags
|
|
2250
|
+
# ------------------------------------------------
|
|
2251
|
+
df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
|
|
2252
|
+
df_combined = pd.concat([df_combined, df_holidays], axis=1)
|
|
2184
2253
|
df_combined = df_combined.loc[:, ~df_combined.columns.duplicated()]
|
|
2185
2254
|
|
|
2186
|
-
#
|
|
2255
|
+
# ------------------------------------------------
|
|
2256
|
+
# 7. Create weekly dummies for Week of Year & yearly dummies
|
|
2257
|
+
# ------------------------------------------------
|
|
2187
2258
|
df_combined.reset_index(inplace=True)
|
|
2259
|
+
df_combined.rename(columns={"index": "old_index"}, inplace=True) # just in case
|
|
2260
|
+
|
|
2188
2261
|
df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
|
|
2189
2262
|
df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Week"], dtype=int)
|
|
2190
2263
|
|
|
2191
|
-
# Create yearly dummies
|
|
2192
2264
|
df_combined["Year"] = df_combined["Date"].dt.year
|
|
2193
2265
|
df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"], dtype=int)
|
|
2194
2266
|
|
|
2195
|
-
#
|
|
2267
|
+
# ------------------------------------------------
|
|
2268
|
+
# 8. Add constant & trend
|
|
2269
|
+
# ------------------------------------------------
|
|
2196
2270
|
df_combined["Constant"] = 1
|
|
2197
|
-
|
|
2198
|
-
# Add trend
|
|
2199
2271
|
df_combined["Trend"] = df_combined.index + 1
|
|
2200
|
-
|
|
2201
|
-
# Create seasonal indicators for the last day and last Friday of the month
|
|
2202
|
-
df_combined['seas_last_day_of_month'] = df_combined["Date"].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
|
|
2203
|
-
|
|
2204
|
-
def is_last_friday(date):
|
|
2205
|
-
last_day_of_month = date.to_period('M').to_timestamp('M')
|
|
2206
|
-
last_day_weekday = last_day_of_month.dayofweek
|
|
2207
|
-
if last_day_weekday >= 4:
|
|
2208
|
-
days_to_subtract = last_day_weekday - 4
|
|
2209
|
-
else:
|
|
2210
|
-
days_to_subtract = last_day_weekday + 3
|
|
2211
|
-
last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
|
|
2212
|
-
return 1 if date == last_friday else 0
|
|
2213
|
-
|
|
2214
|
-
df_combined['seas_last_friday_of_month'] = df_combined["Date"].apply(is_last_friday)
|
|
2215
2272
|
|
|
2216
|
-
#
|
|
2273
|
+
# ------------------------------------------------
|
|
2274
|
+
# 9. Rename Date -> OBS and return
|
|
2275
|
+
# ------------------------------------------------
|
|
2217
2276
|
df_combined.rename(columns={"Date": "OBS"}, inplace=True)
|
|
2218
2277
|
|
|
2219
2278
|
return df_combined
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.8
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
@@ -35,97 +35,97 @@ The **IMSciences package** is a Python library designed to process incoming data
|
|
|
35
35
|
|
|
36
36
|
---
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
Table of Contents
|
|
39
|
+
=================
|
|
39
40
|
|
|
40
|
-
1.
|
|
41
|
-
2.
|
|
42
|
-
3.
|
|
43
|
-
4.
|
|
44
|
-
5.
|
|
41
|
+
1. `Data Processing <#data-processing>`_
|
|
42
|
+
2. `Data Pulling <#data-pulling>`_
|
|
43
|
+
3. `Installation <#installation>`_
|
|
44
|
+
4. `Usage <#usage>`_
|
|
45
|
+
5. `License <#license>`_
|
|
45
46
|
|
|
46
47
|
---
|
|
47
48
|
|
|
48
49
|
## Data Processing
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
## 1. `get_wd_levels`
|
|
51
|
+
## 1. get_wd_levels
|
|
52
52
|
- **Description**: Get the working directory with the option of moving up parents.
|
|
53
53
|
- **Usage**: `get_wd_levels(levels)`
|
|
54
54
|
- **Example**: `get_wd_levels(0)`
|
|
55
55
|
|
|
56
56
|
---
|
|
57
57
|
|
|
58
|
-
## 2.
|
|
58
|
+
## 2. remove_rows
|
|
59
59
|
- **Description**: Removes a specified number of rows from a pandas DataFrame.
|
|
60
60
|
- **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
|
|
61
61
|
- **Example**: `remove_rows(df, 2)`
|
|
62
62
|
|
|
63
63
|
---
|
|
64
64
|
|
|
65
|
-
## 3.
|
|
65
|
+
## 3. aggregate_daily_to_wc_long
|
|
66
66
|
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
|
|
67
67
|
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
|
|
68
68
|
- **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
|
|
69
69
|
|
|
70
70
|
---
|
|
71
71
|
|
|
72
|
-
## 4.
|
|
72
|
+
## 4. convert_monthly_to_daily
|
|
73
73
|
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
74
74
|
- **Usage**: `convert_monthly_to_daily(df, date_column, divide)`
|
|
75
75
|
- **Example**: `convert_monthly_to_daily(df, 'date')`
|
|
76
76
|
|
|
77
77
|
---
|
|
78
78
|
|
|
79
|
-
## 5.
|
|
79
|
+
## 5. plot_two
|
|
80
80
|
- **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
|
|
81
81
|
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
82
82
|
- **Example**: `plot_two(df1, 'cost', df2, 'cost', 'obs', True)`
|
|
83
83
|
|
|
84
84
|
---
|
|
85
85
|
|
|
86
|
-
## 6.
|
|
86
|
+
## 6. remove_nan_rows
|
|
87
87
|
- **Description**: Removes rows from a DataFrame where the specified column has NaN values.
|
|
88
88
|
- **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
|
|
89
89
|
- **Example**: `remove_nan_rows(df, 'date')`
|
|
90
90
|
|
|
91
91
|
---
|
|
92
92
|
|
|
93
|
-
## 7.
|
|
93
|
+
## 7. filter_rows
|
|
94
94
|
- **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
|
|
95
95
|
- **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
|
|
96
96
|
- **Example**: `filter_rows(df, 'country', ['UK', 'IE'])`
|
|
97
97
|
|
|
98
98
|
---
|
|
99
99
|
|
|
100
|
-
## 8.
|
|
100
|
+
## 8. plot_one
|
|
101
101
|
- **Description**: Plots a specified column from a DataFrame.
|
|
102
102
|
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
103
103
|
- **Example**: `plot_one(df, 'Spend', 'OBS')`
|
|
104
104
|
|
|
105
105
|
---
|
|
106
106
|
|
|
107
|
-
## 9.
|
|
107
|
+
## 9. week_of_year_mapping
|
|
108
108
|
- **Description**: Converts a week column in `yyyy-Www` or `yyyy-ww` format to week commencing date.
|
|
109
109
|
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
110
110
|
- **Example**: `week_of_year_mapping(df, 'week', 'mon')`
|
|
111
111
|
|
|
112
112
|
---
|
|
113
113
|
|
|
114
|
-
## 10.
|
|
114
|
+
## 10. exclude_rows
|
|
115
115
|
- **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
|
|
116
116
|
- **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
117
117
|
- **Example**: `exclude_rows(df, 'week', ['2022-W20', '2022-W21'])`
|
|
118
118
|
|
|
119
119
|
---
|
|
120
120
|
|
|
121
|
-
## 11.
|
|
121
|
+
## 11. rename_cols
|
|
122
122
|
- **Description**: Renames columns in a pandas DataFrame.
|
|
123
123
|
- **Usage**: `rename_cols(df, name)`
|
|
124
124
|
- **Example**: `rename_cols(df, 'ame_facebook')`
|
|
125
125
|
|
|
126
126
|
---
|
|
127
127
|
|
|
128
|
-
## 12.
|
|
128
|
+
## 12. merge_new_and_old
|
|
129
129
|
- **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
|
|
130
130
|
- Merges numeric values from specified columns in the old and new DataFrames based on a given cutoff date.
|
|
131
131
|
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
@@ -133,21 +133,21 @@ The **IMSciences package** is a Python library designed to process incoming data
|
|
|
133
133
|
|
|
134
134
|
---
|
|
135
135
|
|
|
136
|
-
## 13.
|
|
136
|
+
## 13. merge_dataframes_on_date
|
|
137
137
|
- **Description**: Merge a list of DataFrames on a common column.
|
|
138
138
|
- **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
139
139
|
- **Example**: `merge_dataframes_on_date([df1, df2, df3], common_column='OBS', merge_how='outer')`
|
|
140
140
|
|
|
141
141
|
---
|
|
142
142
|
|
|
143
|
-
## 14.
|
|
143
|
+
## 14. merge_and_update_dfs
|
|
144
144
|
- **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
|
|
145
145
|
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
146
146
|
- **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
|
|
147
147
|
|
|
148
148
|
---
|
|
149
149
|
|
|
150
|
-
## 15.
|
|
150
|
+
## 15. convert_us_to_uk_dates
|
|
151
151
|
- **Description**: Convert a DataFrame column with mixed date formats to datetime.
|
|
152
152
|
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
153
153
|
- **Example**: `convert_us_to_uk_dates(df, 'date')`
|
|
@@ -161,189 +161,189 @@ The **IMSciences package** is a Python library designed to process incoming data
|
|
|
161
161
|
|
|
162
162
|
---
|
|
163
163
|
|
|
164
|
-
## 17.
|
|
164
|
+
## 17. pivot_table
|
|
165
165
|
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
166
166
|
- **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
|
|
167
167
|
- **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1', 'OBS': ' >= datetime(2019,9,9)', 'Metric Short Names': ' == spd'}, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=True, fill_missing_weekly_dates=True, week_commencing='W-MON')`
|
|
168
168
|
|
|
169
169
|
---
|
|
170
170
|
|
|
171
|
-
## 18.
|
|
171
|
+
## 18. apply_lookup_table_for_columns
|
|
172
172
|
- **Description**: Equivalent of XLOOKUP in Excel. Allows mapping of a dictionary of substrings within a column.
|
|
173
173
|
- **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
|
|
174
174
|
- **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd', 'clicks': 'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
|
|
175
175
|
|
|
176
176
|
---
|
|
177
177
|
|
|
178
|
-
## 19.
|
|
178
|
+
## 19. aggregate_daily_to_wc_wide
|
|
179
179
|
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
|
|
180
180
|
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
181
181
|
- **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average', True)`
|
|
182
182
|
|
|
183
183
|
---
|
|
184
184
|
|
|
185
|
-
## 20.
|
|
185
|
+
## 20. merge_cols_with_seperator
|
|
186
186
|
- **Description**: Merges multiple columns in a DataFrame into one column with a separator `_`. Useful for lookup tables.
|
|
187
187
|
- **Usage**: `merge_cols_with_seperator(df, col_names, seperator='_', output_column_name='Merged', starting_prefix_str=None, ending_prefix_str=None)`
|
|
188
188
|
- **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], seperator='|', output_column_name='Merged Columns', starting_prefix_str='start_', ending_prefix_str='_end')`
|
|
189
189
|
|
|
190
190
|
---
|
|
191
191
|
|
|
192
|
-
## 21.
|
|
192
|
+
## 21. check_sum_of_df_cols_are_equal
|
|
193
193
|
- **Description**: Checks if the sum of two columns in two DataFrames are the same, and provides the sums and differences.
|
|
194
194
|
- **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
|
|
195
195
|
- **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
|
|
196
196
|
|
|
197
197
|
---
|
|
198
198
|
|
|
199
|
-
## 22.
|
|
199
|
+
## 22. convert_2_df_cols_to_dict
|
|
200
200
|
- **Description**: Creates a dictionary using two columns in a DataFrame.
|
|
201
201
|
- **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
202
202
|
- **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
203
203
|
|
|
204
204
|
---
|
|
205
205
|
|
|
206
|
-
## 23.
|
|
206
|
+
## 23. create_FY_and_H_columns
|
|
207
207
|
- **Description**: Creates financial year, half-year, and financial half-year columns.
|
|
208
208
|
- **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
|
|
209
209
|
- **Example**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023', short_format='Yes', half_years='Yes', combined_FY_and_H='Yes')`
|
|
210
210
|
|
|
211
211
|
---
|
|
212
212
|
|
|
213
|
-
## 24.
|
|
213
|
+
## 24. keyword_lookup_replacement
|
|
214
214
|
- **Description**: Updates chosen values in a specified column of the DataFrame based on a lookup dictionary.
|
|
215
215
|
- **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
|
|
216
216
|
- **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment', 'product'], qlik_dict_for_channel, output_column_name='Channel New')`
|
|
217
217
|
|
|
218
218
|
---
|
|
219
219
|
|
|
220
|
-
## 25.
|
|
220
|
+
## 25. create_new_version_of_col_using_LUT
|
|
221
221
|
- **Description**: Creates a new column in a DataFrame by mapping values from an old column using a lookup table.
|
|
222
222
|
- **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
223
223
|
- **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', search_campaign_name_retag_lut, 'Campaign Name New')`
|
|
224
224
|
|
|
225
225
|
---
|
|
226
226
|
|
|
227
|
-
## 26.
|
|
227
|
+
## 26. convert_df_wide_2_long
|
|
228
228
|
- **Description**: Converts a DataFrame from wide to long format.
|
|
229
229
|
- **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
|
|
230
230
|
- **Example**: `convert_df_wide_2_long(df, ['Media Cost', 'Impressions', 'Clicks'], variable_col_name='Metric')`
|
|
231
231
|
|
|
232
232
|
---
|
|
233
233
|
|
|
234
|
-
## 27.
|
|
234
|
+
## 27. manually_edit_data
|
|
235
235
|
- **Description**: Enables manual updates to DataFrame cells by applying filters and editing a column.
|
|
236
236
|
- **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
237
237
|
- **Example**: `manually_edit_data(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'}, 'Master Include', 1, change_in_existing_df_col='Yes', new_col_to_change_name='Master Include', manual_edit_col_name='Manual Changes')`
|
|
238
238
|
|
|
239
239
|
---
|
|
240
240
|
|
|
241
|
-
## 28.
|
|
241
|
+
## 28. format_numbers_with_commas
|
|
242
242
|
- **Description**: Formats numeric data into numbers with commas and specified decimal places.
|
|
243
243
|
- **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
244
244
|
- **Example**: `format_numbers_with_commas(df, 1)`
|
|
245
245
|
|
|
246
246
|
---
|
|
247
247
|
|
|
248
|
-
## 29.
|
|
248
|
+
## 29. filter_df_on_multiple_conditions
|
|
249
249
|
- **Description**: Filters a DataFrame based on multiple conditions from a dictionary.
|
|
250
250
|
- **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
251
251
|
- **Example**: `filter_df_on_multiple_conditions(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'})`
|
|
252
252
|
|
|
253
253
|
---
|
|
254
254
|
|
|
255
|
-
## 30.
|
|
255
|
+
## 30. read_and_concatenate_files
|
|
256
256
|
- **Description**: Reads and concatenates all files of a specified type in a folder.
|
|
257
257
|
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
258
258
|
- **Example**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
259
259
|
|
|
260
260
|
---
|
|
261
261
|
|
|
262
|
-
## 31.
|
|
262
|
+
## 31. remove_zero_values
|
|
263
263
|
- **Description**: Removes rows with zero values in a specified column.
|
|
264
264
|
- **Usage**: `remove_zero_values(data_frame, column_to_filter)`
|
|
265
265
|
- **Example**: `remove_zero_values(df, 'Funeral_Delivery')`
|
|
266
266
|
|
|
267
267
|
---
|
|
268
268
|
|
|
269
|
-
## 32.
|
|
269
|
+
## 32. upgrade_outdated_packages
|
|
270
270
|
- **Description**: Upgrades all outdated packages in the environment.
|
|
271
271
|
- **Usage**: `upgrade_outdated_packages()`
|
|
272
272
|
- **Example**: `upgrade_outdated_packages()`
|
|
273
273
|
|
|
274
274
|
---
|
|
275
275
|
|
|
276
|
-
## 33.
|
|
276
|
+
## 33. convert_mixed_formats_dates
|
|
277
277
|
- **Description**: Converts a mix of US and UK date formats to datetime.
|
|
278
278
|
- **Usage**: `convert_mixed_formats_dates(df, date_col)`
|
|
279
279
|
- **Example**: `convert_mixed_formats_dates(df, 'OBS')`
|
|
280
280
|
|
|
281
281
|
---
|
|
282
282
|
|
|
283
|
-
## 34.
|
|
283
|
+
## 34. fill_weekly_date_range
|
|
284
284
|
- **Description**: Fills in missing weeks with zero values.
|
|
285
285
|
- **Usage**: `fill_weekly_date_range(df, date_column, freq)`
|
|
286
286
|
- **Example**: `fill_weekly_date_range(df, 'OBS', 'W-MON')`
|
|
287
287
|
|
|
288
288
|
---
|
|
289
289
|
|
|
290
|
-
## 35.
|
|
290
|
+
## 35. add_prefix_and_suffix
|
|
291
291
|
- **Description**: Adds prefixes and/or suffixes to column headers.
|
|
292
292
|
- **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
|
|
293
293
|
- **Example**: `add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')`
|
|
294
294
|
|
|
295
295
|
---
|
|
296
296
|
|
|
297
|
-
## 36.
|
|
297
|
+
## 36. create_dummies
|
|
298
298
|
- **Description**: Converts time series into binary indicators based on a threshold.
|
|
299
299
|
- **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
|
|
300
300
|
- **Example**: `create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')`
|
|
301
301
|
|
|
302
302
|
---
|
|
303
303
|
|
|
304
|
-
## 37.
|
|
304
|
+
## 37. replace_substrings
|
|
305
305
|
- **Description**: Replaces substrings in a column of strings using a dictionary and can change column values to lowercase.
|
|
306
306
|
- **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
|
|
307
307
|
- **Example**: `replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')`
|
|
308
308
|
|
|
309
309
|
---
|
|
310
310
|
|
|
311
|
-
## 38. `add_total_column
|
|
311
|
+
## 38. `add_total_column
|
|
312
312
|
- **Description**: Sums all columns (excluding a specified column) to create a total column.
|
|
313
313
|
- **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
|
|
314
314
|
- **Example**: `add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')`
|
|
315
315
|
|
|
316
316
|
---
|
|
317
317
|
|
|
318
|
-
## 39.
|
|
318
|
+
## 39. apply_lookup_table_based_on_substring
|
|
319
319
|
- **Description**: Maps substrings in a column to values using a lookup dictionary.
|
|
320
320
|
- **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
|
|
321
321
|
- **Example**: `apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')`
|
|
322
322
|
|
|
323
323
|
---
|
|
324
324
|
|
|
325
|
-
## 40.
|
|
325
|
+
## 40. compare_overlap
|
|
326
326
|
- **Description**: Compares matching rows and columns in two DataFrames and outputs the differences.
|
|
327
327
|
- **Usage**: `compare_overlap(df1, df2, date_col)`
|
|
328
328
|
- **Example**: `compare_overlap(df_1, df_2, 'obs')`
|
|
329
329
|
|
|
330
330
|
---
|
|
331
331
|
|
|
332
|
-
## 41.
|
|
332
|
+
## 41. week_commencing_2_week_commencing_conversion
|
|
333
333
|
- **Description**: Converts a week commencing column to a different start day.
|
|
334
334
|
- **Usage**: `week_commencing_2_week_commencing_conversion(df, date_col, week_commencing='sun')`
|
|
335
335
|
- **Example**: `week_commencing_2_week_commencing_conversion(df, 'obs', week_commencing='mon')`
|
|
336
336
|
|
|
337
337
|
---
|
|
338
338
|
|
|
339
|
-
## 42.
|
|
339
|
+
## 42. plot_chart
|
|
340
340
|
- **Description**: Plots various chart types including line, area, scatter, and bar.
|
|
341
341
|
- **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)`
|
|
342
342
|
- **Example**: `plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')`
|
|
343
343
|
|
|
344
344
|
---
|
|
345
345
|
|
|
346
|
-
## 43.
|
|
346
|
+
## 43. plot_two_with_common_cols
|
|
347
347
|
- **Description**: Plots charts for two DataFrames based on common column names.
|
|
348
348
|
- **Usage**: `plot_two_with_common_cols(df1, df2, date_column, same_axis=True)`
|
|
349
349
|
- **Example**: `plot_two_with_common_cols(df_1, df_2, date_column='obs')`
|
|
@@ -411,7 +411,7 @@ The **IMSciences package** is a Python library designed to process incoming data
|
|
|
411
411
|
Install the IMS package via pip:
|
|
412
412
|
|
|
413
413
|
```bash
|
|
414
|
-
pip install
|
|
414
|
+
pip install imsciences
|
|
415
415
|
```
|
|
416
416
|
|
|
417
417
|
---
|
|
@@ -3,15 +3,15 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
|
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=7CfK2dMjPnBBw6I4st-20MdMlLjZULviFVXF2eMD9NI,80
|
|
5
5
|
imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
|
|
6
|
-
imsciences/datafunctions.py,sha256=
|
|
6
|
+
imsciences/datafunctions.py,sha256=XrvJWWFh9gdKAoeIHee2nYi0Z0zPxmW3oB6ICnGTxYc,158444
|
|
7
7
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
8
8
|
imsciences/unittesting.py,sha256=d9H5HN8y7oof59hqN9mGqkjulExqFd93BEW-X8w_Id8,58142
|
|
9
9
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
11
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
12
12
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
13
|
-
imsciences-0.8.dist-info/METADATA,sha256=
|
|
14
|
-
imsciences-0.8.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
15
|
-
imsciences-0.8.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
16
|
-
imsciences-0.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
17
|
-
imsciences-0.8.dist-info/RECORD,,
|
|
13
|
+
imsciences-0.8.1.dist-info/METADATA,sha256=sJK90uzVkH6KCDVM3hmkbRyGoXNmie8JMoCVLy4J7Fg,17785
|
|
14
|
+
imsciences-0.8.1.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
15
|
+
imsciences-0.8.1.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
16
|
+
imsciences-0.8.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
17
|
+
imsciences-0.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|