imsciences 0.5.4.7__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/__init__.py +4 -1
- imsciences/datafunctions-IMS-24Ltp-3.py +2711 -0
- imsciences/datafunctions.py +2842 -170
- imsciences/datapull.py +374 -0
- imsciences/geo.py +195 -0
- imsciences/mmm.py +1415 -0
- imsciences/pull.py +1483 -0
- imsciences/unittesting.py +1064 -0
- imsciences/vis.py +196 -0
- imsciences-0.9.3.dist-info/LICENSE.txt +21 -0
- imsciences-0.9.3.dist-info/METADATA +330 -0
- imsciences-0.9.3.dist-info/PKG-INFO-IMS-24Ltp-3 +24 -0
- imsciences-0.9.3.dist-info/RECORD +22 -0
- {imsciences-0.5.4.7.dist-info → imsciences-0.9.3.dist-info}/WHEEL +1 -1
- imsciences-0.5.4.7.dist-info/METADATA +0 -95
- imsciences-0.5.4.7.dist-info/RECORD +0 -13
- {imsciences-0.5.4.7.dist-info → imsciences-0.9.3.dist-info}/top_level.txt +0 -0
imsciences/vis.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import plotly.express as px
|
|
3
|
+
import plotly.graph_objs as go
|
|
4
|
+
|
|
5
|
+
class datavis:
|
|
6
|
+
|
|
7
|
+
def help(self):
|
|
8
|
+
"""
|
|
9
|
+
Displays a help menu listing all the available functions with their descriptions, usage, and examples.
|
|
10
|
+
"""
|
|
11
|
+
print("1. plot_one")
|
|
12
|
+
print(" - Description: Plots a specified column from a DataFrame with white background and black axes.")
|
|
13
|
+
print(" - Usage: plot_one(df1, col1, date_column)")
|
|
14
|
+
print(" - Example: plot_one(df, 'sales', 'date')\n")
|
|
15
|
+
|
|
16
|
+
print("2. plot_two")
|
|
17
|
+
print(" - Description: Plots specified columns from two DataFrames, optionally on the same or separate y-axes.")
|
|
18
|
+
print(" - Usage: plot_two(df1, col1, df2, col2, date_column, same_axis=True)")
|
|
19
|
+
print(" - Example: plot_two(df1, 'sales_vol', df2, 'sales_revenue', 'date', same_axis=False)\n")
|
|
20
|
+
|
|
21
|
+
print("3. plot_chart")
|
|
22
|
+
print(" - Description: Plots various chart types using Plotly, including line, bar, scatter, area, pie, etc.")
|
|
23
|
+
print(" - Usage: plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values')")
|
|
24
|
+
print(" - Example: plot_chart(df, 'date', ['sales', 'revenue'], chart_type='line', title='Sales and Revenue')\n")
|
|
25
|
+
|
|
26
|
+
def plot_one(self, df1, col1, date_column):
|
|
27
|
+
"""
|
|
28
|
+
Plots specified column from a DataFrame with white background and black axes,
|
|
29
|
+
using a specified date column as the X-axis.
|
|
30
|
+
|
|
31
|
+
:param df1: DataFrame
|
|
32
|
+
:param col1: Column name from the DataFrame
|
|
33
|
+
:param date_column: The name of the date column to use for the X-axis
|
|
34
|
+
"""
|
|
35
|
+
# Check if columns exist in the DataFrame
|
|
36
|
+
if col1 not in df1.columns or date_column not in df1.columns:
|
|
37
|
+
raise ValueError("Column not found in DataFrame")
|
|
38
|
+
|
|
39
|
+
# Check if the date column is in datetime format, if not convert it
|
|
40
|
+
if not pd.api.types.is_datetime64_any_dtype(df1[date_column]):
|
|
41
|
+
try:
|
|
42
|
+
# Convert with dayfirst=True to interpret dates correctly
|
|
43
|
+
df1[date_column] = pd.to_datetime(df1[date_column], dayfirst=True)
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise ValueError(f"Error converting {date_column} to datetime: {e}")
|
|
46
|
+
|
|
47
|
+
# Plotting using Plotly Express
|
|
48
|
+
fig = px.line(df1, x=date_column, y=col1)
|
|
49
|
+
|
|
50
|
+
# Update layout for white background and black axes lines, and setting y-axis to start at 0
|
|
51
|
+
fig.update_layout(
|
|
52
|
+
plot_bgcolor='white',
|
|
53
|
+
xaxis=dict(
|
|
54
|
+
showline=True,
|
|
55
|
+
linecolor='black'
|
|
56
|
+
),
|
|
57
|
+
yaxis=dict(
|
|
58
|
+
showline=True,
|
|
59
|
+
linecolor='black',
|
|
60
|
+
rangemode='tozero' # Setting Y-axis to start at 0 if suitable
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return fig
|
|
65
|
+
|
|
66
|
+
def plot_two(self, df1, col1, df2, col2, date_column, same_axis=True):
|
|
67
|
+
"""
|
|
68
|
+
Plots specified columns from two different DataFrames with both different and the same lengths,
|
|
69
|
+
using a specified date column as the X-axis, and charting on either the same or separate y-axes.
|
|
70
|
+
|
|
71
|
+
:param df1: First DataFrame
|
|
72
|
+
:param col1: Column name from the first DataFrame
|
|
73
|
+
:param df2: Second DataFrame
|
|
74
|
+
:param col2: Column name from the second DataFrame
|
|
75
|
+
:param date_column: The name of the date column to use for the X-axis
|
|
76
|
+
:param same_axis: If True, plot both traces on the same y-axis; otherwise, use separate y-axes.
|
|
77
|
+
:return: Plotly figure
|
|
78
|
+
"""
|
|
79
|
+
# Validate inputs
|
|
80
|
+
if col1 not in df1.columns or date_column not in df1.columns:
|
|
81
|
+
raise ValueError(f"Column {col1} or {date_column} not found in the first DataFrame.")
|
|
82
|
+
if col2 not in df2.columns or date_column not in df2.columns:
|
|
83
|
+
raise ValueError(f"Column {col2} or {date_column} not found in the second DataFrame.")
|
|
84
|
+
|
|
85
|
+
# Ensure date columns are in datetime format
|
|
86
|
+
df1[date_column] = pd.to_datetime(df1[date_column], errors='coerce')
|
|
87
|
+
df2[date_column] = pd.to_datetime(df2[date_column], errors='coerce')
|
|
88
|
+
|
|
89
|
+
# Drop rows with invalid dates
|
|
90
|
+
df1 = df1.dropna(subset=[date_column])
|
|
91
|
+
df2 = df2.dropna(subset=[date_column])
|
|
92
|
+
|
|
93
|
+
# Create traces for the first and second DataFrames
|
|
94
|
+
trace1 = go.Scatter(x=df1[date_column], y=df1[col1], mode='lines', name=col1, yaxis='y1')
|
|
95
|
+
|
|
96
|
+
if same_axis:
|
|
97
|
+
trace2 = go.Scatter(x=df2[date_column], y=df2[col2], mode='lines', name=col2, yaxis='y1')
|
|
98
|
+
else:
|
|
99
|
+
trace2 = go.Scatter(x=df2[date_column], y=df2[col2], mode='lines', name=col2, yaxis='y2')
|
|
100
|
+
|
|
101
|
+
# Define layout for the plot
|
|
102
|
+
layout = go.Layout(
|
|
103
|
+
title="Comparison Plot",
|
|
104
|
+
xaxis=dict(title=date_column, showline=True, linecolor='black'),
|
|
105
|
+
yaxis=dict(
|
|
106
|
+
title=col1 if same_axis else f"{col1} (y1)",
|
|
107
|
+
showline=True,
|
|
108
|
+
linecolor='black',
|
|
109
|
+
rangemode='tozero'
|
|
110
|
+
),
|
|
111
|
+
yaxis2=dict(
|
|
112
|
+
title=f"{col2} (y2)" if not same_axis else "",
|
|
113
|
+
overlaying='y',
|
|
114
|
+
side='right',
|
|
115
|
+
showline=True,
|
|
116
|
+
linecolor='black',
|
|
117
|
+
rangemode='tozero'
|
|
118
|
+
),
|
|
119
|
+
showlegend=True,
|
|
120
|
+
plot_bgcolor='white' # Set the plot background color to white
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Create the figure with the defined layout and traces
|
|
124
|
+
fig = go.Figure(data=[trace1, trace2], layout=layout)
|
|
125
|
+
|
|
126
|
+
return fig
|
|
127
|
+
|
|
128
|
+
def plot_chart(self, df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
|
|
129
|
+
"""
|
|
130
|
+
Plot various types of charts using Plotly.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
df (pandas.DataFrame): DataFrame containing the data.
|
|
134
|
+
date_col (str): The name of the column with date information.
|
|
135
|
+
value_cols (list): List of columns to plot.
|
|
136
|
+
chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', etc.).
|
|
137
|
+
title (str): Title of the chart.
|
|
138
|
+
x_title (str): Title of the x-axis.
|
|
139
|
+
y_title (str): Title of the y-axis.
|
|
140
|
+
**kwargs: Additional keyword arguments for customization.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
plotly.graph_objects.Figure: The Plotly figure object.
|
|
144
|
+
"""
|
|
145
|
+
import pandas as pd
|
|
146
|
+
import plotly.graph_objects as go
|
|
147
|
+
|
|
148
|
+
# Ensure the date column is in datetime format
|
|
149
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
150
|
+
|
|
151
|
+
# Validate input columns
|
|
152
|
+
value_cols = [col for col in value_cols if col in df.columns and col != date_col]
|
|
153
|
+
if not value_cols:
|
|
154
|
+
raise ValueError("No valid columns provided for plotting.")
|
|
155
|
+
|
|
156
|
+
# Initialize the figure
|
|
157
|
+
fig = go.Figure()
|
|
158
|
+
|
|
159
|
+
# Define a mapping for chart types to corresponding Plotly trace types
|
|
160
|
+
chart_trace_map = {
|
|
161
|
+
'line': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='lines', name=col, **kwargs),
|
|
162
|
+
'bar': lambda col: go.Bar(x=df[date_col], y=df[col], name=col, **kwargs),
|
|
163
|
+
'scatter': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='markers', name=col, **kwargs),
|
|
164
|
+
'area': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='lines', fill='tozeroy', name=col, **kwargs),
|
|
165
|
+
'pie': lambda col: go.Pie(labels=df[date_col], values=df[col], name=col, **kwargs),
|
|
166
|
+
'box': lambda col: go.Box(y=df[col], name=col, **kwargs),
|
|
167
|
+
'bubble': lambda _: go.Scatter(
|
|
168
|
+
x=df[value_cols[0]], y=df[value_cols[1]], mode='markers',
|
|
169
|
+
marker=dict(size=df[value_cols[2]]), name='Bubble Chart', **kwargs
|
|
170
|
+
),
|
|
171
|
+
'funnel': lambda col: go.Funnel(y=df[date_col], x=df[col], **kwargs),
|
|
172
|
+
'waterfall': lambda col: go.Waterfall(x=df[date_col], y=df[col], measure=df[value_cols[1]], **kwargs),
|
|
173
|
+
'scatter3d': lambda _: go.Scatter3d(
|
|
174
|
+
x=df[value_cols[0]], y=df[value_cols[1]], z=df[value_cols[2]],
|
|
175
|
+
mode='markers', **kwargs
|
|
176
|
+
)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
# Generate traces for the selected chart type
|
|
180
|
+
if chart_type in chart_trace_map:
|
|
181
|
+
for col in value_cols:
|
|
182
|
+
trace = chart_trace_map[chart_type](col)
|
|
183
|
+
fig.add_trace(trace)
|
|
184
|
+
else:
|
|
185
|
+
raise ValueError(f"Unsupported chart type: {chart_type}")
|
|
186
|
+
|
|
187
|
+
# Update the layout of the figure
|
|
188
|
+
fig.update_layout(
|
|
189
|
+
title=title,
|
|
190
|
+
xaxis_title=x_title,
|
|
191
|
+
yaxis_title=y_title,
|
|
192
|
+
legend_title='Series',
|
|
193
|
+
template='plotly_dark'
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return fig
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Independent Marketing Sciences
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: imsciences
|
|
3
|
+
Version: 0.9.3
|
|
4
|
+
Summary: IMS Data Processing Package
|
|
5
|
+
Author: IMS
|
|
6
|
+
Author-email: cam@im-sciences.com
|
|
7
|
+
Keywords: python,data processing,apis
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE.txt
|
|
16
|
+
Requires-Dist: pandas
|
|
17
|
+
Requires-Dist: plotly
|
|
18
|
+
Requires-Dist: numpy
|
|
19
|
+
Requires-Dist: fredapi
|
|
20
|
+
Requires-Dist: bs4
|
|
21
|
+
Requires-Dist: yfinance
|
|
22
|
+
Requires-Dist: holidays
|
|
23
|
+
Requires-Dist: google-analytics-data
|
|
24
|
+
Requires-Dist: geopandas
|
|
25
|
+
|
|
26
|
+
# IMS Package Documentation
|
|
27
|
+
|
|
28
|
+
The **Independent Marketing Sciences** package is a Python library designed to process incoming data into a format tailored for projects, particularly those utilising weekly time series data. This package offers a suite of functions for efficient data collection, manipulation, visualisation and analysis.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Key Features
|
|
33
|
+
- Seamless data processing for time series workflows.
|
|
34
|
+
- Aggregation, filtering, and transformation of time series data.
|
|
35
|
+
- Visualising Data
|
|
36
|
+
- Integration with external data sources like FRED, Bank of England, ONS and OECD.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
Table of Contents
|
|
41
|
+
=================
|
|
42
|
+
|
|
43
|
+
1. [Data Processing for Time Series](#data-processing-for-time-series)
|
|
44
|
+
2. [Data Processing for Incrementality Testing](#data-processing-for-incrementality-testing)
|
|
45
|
+
3. [Data Visualisations](#data-visualisations)
|
|
46
|
+
4. [Data Pulling](#data-pulling)
|
|
47
|
+
5. [Installation](#installation)
|
|
48
|
+
6. [Usage](#usage)
|
|
49
|
+
7. [License](#license)
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Data Processing for Time Series
|
|
54
|
+
|
|
55
|
+
## 1. `get_wd_levels`
|
|
56
|
+
- **Description**: Get the working directory with the option of moving up parents.
|
|
57
|
+
- **Usage**: `get_wd_levels(levels)`
|
|
58
|
+
- **Example**: `get_wd_levels(0)`
|
|
59
|
+
|
|
60
|
+
## 2. `aggregate_daily_to_wc_long`
|
|
61
|
+
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
|
|
62
|
+
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
|
|
63
|
+
- **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
|
|
64
|
+
|
|
65
|
+
## 3. `convert_monthly_to_daily`
|
|
66
|
+
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
67
|
+
- **Usage**: `convert_monthly_to_daily(df, date_column, divide=True)`
|
|
68
|
+
- **Example**: `convert_monthly_to_daily(df, 'date')`
|
|
69
|
+
|
|
70
|
+
## 4. `week_of_year_mapping`
|
|
71
|
+
- **Description**: Converts a week column in 'yyyy-Www' or 'yyyy-ww' format to week commencing date.
|
|
72
|
+
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
73
|
+
- **Example**: `week_of_year_mapping(df, 'week', 'mon')`
|
|
74
|
+
|
|
75
|
+
## 5. `rename_cols`
|
|
76
|
+
- **Description**: Renames columns in a pandas DataFrame with a specified prefix or format.
|
|
77
|
+
- **Usage**: `rename_cols(df, name='ame_')`
|
|
78
|
+
- **Example**: `rename_cols(df, 'ame_facebook')`
|
|
79
|
+
|
|
80
|
+
## 6. `merge_new_and_old`
|
|
81
|
+
- **Description**: Creates a new DataFrame by merging old and new dataframes based on a cutoff date.
|
|
82
|
+
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
83
|
+
- **Example**: `merge_new_and_old(df1, 'old_col', df2, 'new_col', '2023-01-15')`
|
|
84
|
+
|
|
85
|
+
## 7. `merge_dataframes_on_column`
|
|
86
|
+
- **Description**: Merge a list of DataFrames on a common column.
|
|
87
|
+
- **Usage**: `merge_dataframes_on_column(dataframes, common_column='OBS', merge_how='outer')`
|
|
88
|
+
- **Example**: `merge_dataframes_on_column([df1, df2, df3], common_column='OBS', merge_how='outer')`
|
|
89
|
+
|
|
90
|
+
## 8. `merge_and_update_dfs`
|
|
91
|
+
- **Description**: Merges two dataframes, updating columns from the second dataframe where values are available.
|
|
92
|
+
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
93
|
+
- **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
|
|
94
|
+
|
|
95
|
+
## 9. `convert_us_to_uk_dates`
|
|
96
|
+
- **Description**: Convert a DataFrame column with mixed US and UK date formats to datetime.
|
|
97
|
+
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
98
|
+
- **Example**: `convert_us_to_uk_dates(df, 'date')`
|
|
99
|
+
|
|
100
|
+
## 10. `combine_sheets`
|
|
101
|
+
- **Description**: Combines multiple DataFrames from a dictionary into a single DataFrame.
|
|
102
|
+
- **Usage**: `combine_sheets(all_sheets)`
|
|
103
|
+
- **Example**: `combine_sheets({'Sheet1': df1, 'Sheet2': df2})`
|
|
104
|
+
|
|
105
|
+
## 11. `pivot_table`
|
|
106
|
+
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
107
|
+
- **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
|
|
108
|
+
- **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1'}, fill_value=0)`
|
|
109
|
+
|
|
110
|
+
## 12. `apply_lookup_table_for_columns`
|
|
111
|
+
- **Description**: Maps substrings in columns to new values based on a dictionary.
|
|
112
|
+
- **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
|
|
113
|
+
- **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
|
|
114
|
+
|
|
115
|
+
## 13. `aggregate_daily_to_wc_wide`
|
|
116
|
+
- **Description**: Aggregates daily data into weekly data and pivots it to wide format.
|
|
117
|
+
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc='sun', aggregation='sum', include_totals=False)`
|
|
118
|
+
- **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions'], 'mon', 'average', True)`
|
|
119
|
+
|
|
120
|
+
## 14. `merge_cols_with_seperator`
|
|
121
|
+
- **Description**: Merges multiple columns in a DataFrame into one column with a specified separator.
|
|
122
|
+
- **Usage**: `merge_cols_with_seperator(df, col_names, separator='_', output_column_name='Merged')`
|
|
123
|
+
- **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], separator='|', output_column_name='Merged Columns')`
|
|
124
|
+
|
|
125
|
+
## 15. `check_sum_of_df_cols_are_equal`
|
|
126
|
+
- **Description**: Checks if the sum of two columns in two DataFrames are equal and provides the difference.
|
|
127
|
+
- **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
|
|
128
|
+
- **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
|
|
129
|
+
|
|
130
|
+
## 16. `convert_2_df_cols_to_dict`
|
|
131
|
+
- **Description**: Creates a dictionary from two DataFrame columns.
|
|
132
|
+
- **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
133
|
+
- **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
134
|
+
|
|
135
|
+
## 17. `create_FY_and_H_columns`
|
|
136
|
+
- **Description**: Adds financial year and half-year columns to a DataFrame based on a start date.
|
|
137
|
+
- **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
|
|
138
|
+
- **Example**: `create_FY_and_H_columns(df, 'Week', '2022-10-03', 'FY2023', short_format='Yes')`
|
|
139
|
+
|
|
140
|
+
## 18. `keyword_lookup_replacement`
|
|
141
|
+
- **Description**: Updates values in a column based on a lookup dictionary with conditional logic.
|
|
142
|
+
- **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
|
|
143
|
+
- **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment'], lookup_dict, output_column_name='Channel New')`
|
|
144
|
+
|
|
145
|
+
## 19. `create_new_version_of_col_using_LUT`
|
|
146
|
+
- **Description**: Creates a new column based on a lookup table applied to an existing column.
|
|
147
|
+
- **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
148
|
+
- **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', lookup_dict)`
|
|
149
|
+
|
|
150
|
+
## 20. `convert_df_wide_2_long`
|
|
151
|
+
- **Description**: Converts a wide-format DataFrame into a long-format DataFrame.
|
|
152
|
+
- **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
|
|
153
|
+
- **Example**: `convert_df_wide_2_long(df, ['col1', 'col2'], variable_col_name='Var', value_col_name='Val')`
|
|
154
|
+
|
|
155
|
+
## 21. `manually_edit_data`
|
|
156
|
+
- **Description**: Manually updates specified cells in a DataFrame based on filters.
|
|
157
|
+
- **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
158
|
+
- **Example**: `manually_edit_data(df, {'col1': '== 1'}, 'col2', 'new_val', add_notes='Yes', note='Manual Update')`
|
|
159
|
+
|
|
160
|
+
## 22. `format_numbers_with_commas`
|
|
161
|
+
- **Description**: Formats numerical columns with commas and a specified number of decimal places.
|
|
162
|
+
- **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
163
|
+
- **Example**: `format_numbers_with_commas(df, decimal_length_chosen=1)`
|
|
164
|
+
|
|
165
|
+
## 23. `filter_df_on_multiple_conditions`
|
|
166
|
+
- **Description**: Filters a DataFrame based on multiple column conditions.
|
|
167
|
+
- **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
168
|
+
- **Example**: `filter_df_on_multiple_conditions(df, {'col1': '>= 5', 'col2': '== 'val''})`
|
|
169
|
+
|
|
170
|
+
## 24. `read_and_concatenate_files`
|
|
171
|
+
- **Description**: Reads and concatenates files from a specified folder into a single DataFrame.
|
|
172
|
+
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
173
|
+
- **Example**: `read_and_concatenate_files('/path/to/files', file_type='xlsx')`
|
|
174
|
+
|
|
175
|
+
## 25. `upgrade_outdated_packages`
|
|
176
|
+
- **Description**: Upgrades all outdated Python packages except specified ones.
|
|
177
|
+
- **Usage**: `upgrade_outdated_packages(exclude_packages=['twine'])`
|
|
178
|
+
- **Example**: `upgrade_outdated_packages(exclude_packages=['pip', 'setuptools'])`
|
|
179
|
+
|
|
180
|
+
## 26. `convert_mixed_formats_dates`
|
|
181
|
+
- **Description**: Converts mixed-format date columns into standardized datetime format.
|
|
182
|
+
- **Usage**: `convert_mixed_formats_dates(df, column_name)`
|
|
183
|
+
- **Example**: `convert_mixed_formats_dates(df, 'date_col')`
|
|
184
|
+
|
|
185
|
+
## 27. `fill_weekly_date_range`
|
|
186
|
+
- **Description**: Fills in missing weekly dates in a DataFrame with a specified frequency.
|
|
187
|
+
- **Usage**: `fill_weekly_date_range(df, date_column, freq='W-MON')`
|
|
188
|
+
- **Example**: `fill_weekly_date_range(df, 'date_col')`
|
|
189
|
+
|
|
190
|
+
## 28. `add_prefix_and_suffix`
|
|
191
|
+
- **Description**: Adds prefixes and/or suffixes to column names, with an option to exclude a date column.
|
|
192
|
+
- **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
|
|
193
|
+
- **Example**: `add_prefix_and_suffix(df, prefix='pre_', suffix='_suf', date_col='date_col')`
|
|
194
|
+
|
|
195
|
+
## 29. `create_dummies`
|
|
196
|
+
- **Description**: Creates dummy variables for columns, with an option to add a total dummy column.
|
|
197
|
+
- **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
|
|
198
|
+
- **Example**: `create_dummies(df, date_col='date_col', dummy_threshold=1)`
|
|
199
|
+
|
|
200
|
+
## 30. `replace_substrings`
|
|
201
|
+
- **Description**: Replaces substrings in a column based on a dictionary, with options for case conversion and new column creation.
|
|
202
|
+
- **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
|
|
203
|
+
- **Example**: `replace_substrings(df, 'text_col', {'old': 'new'}, to_lower=True, new_column='updated_text')`
|
|
204
|
+
|
|
205
|
+
## 31. `add_total_column`
|
|
206
|
+
- **Description**: Adds a total column to a DataFrame by summing values across columns, optionally excluding one.
|
|
207
|
+
- **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
|
|
208
|
+
- **Example**: `add_total_column(df, exclude_col='date_col')`
|
|
209
|
+
|
|
210
|
+
## 32. `apply_lookup_table_based_on_substring`
|
|
211
|
+
- **Description**: Categorizes text in a column using a lookup table based on substrings.
|
|
212
|
+
- **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
|
|
213
|
+
- **Example**: `apply_lookup_table_based_on_substring(df, 'text_col', {'sub1': 'cat1', 'sub2': 'cat2'})`
|
|
214
|
+
|
|
215
|
+
## 33. `compare_overlap`
|
|
216
|
+
- **Description**: Compares overlapping periods between two DataFrames and summarizes differences.
|
|
217
|
+
- **Usage**: `compare_overlap(df1, df2, date_col)`
|
|
218
|
+
- **Example**: `compare_overlap(df1, df2, 'date_col')`
|
|
219
|
+
|
|
220
|
+
## 34. `week_commencing_2_week_commencing_conversion_isoweekday`
|
|
221
|
+
- **Description**: Maps dates to the start of the current ISO week based on a specified weekday.
|
|
222
|
+
- **Usage**: `week_commencing_2_week_commencing_conversion_isoweekday(df, date_col, week_commencing='mon')`
|
|
223
|
+
- **Example**: `week_commencing_2_week_commencing_conversion_isoweekday(df, 'date_col', week_commencing='fri')`
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Data Processing for Incrementality Testing
|
|
228
|
+
|
|
229
|
+
## 1. `pull_ga`
|
|
230
|
+
- **Description**: Pull in GA4 data for geo experiments.
|
|
231
|
+
- **Usage**: `pull_ga(credentials_file, property_id, start_date, country, metrics)`
|
|
232
|
+
- **Example**: `pull_ga('GeoExperiment-31c5f5db2c39.json', '111111111', '2023-10-15', 'United Kingdom', ['totalUsers', 'newUsers'])`
|
|
233
|
+
|
|
234
|
+
## 2. `process_itv_analysis`
|
|
235
|
+
- **Description**: Pull in GA4 data for geo experiments.
|
|
236
|
+
- **Usage**: `process_itv_analysis(self, raw_df, itv_path, cities_path, media_spend_path, output_path, group1, group2)`
|
|
237
|
+
- **Example**: `process_itv_analysis(df, 'itv regional mapping.csv', 'Geo_Mappings_with_Coordinates.xlsx', 'IMS.xlsx', 'itv_for_test_analysis_itvx.csv', ['West', 'Westcountry', 'Tyne Tees'], ['Central Scotland', 'North Scotland'])`
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## Data Visualisations
|
|
242
|
+
|
|
243
|
+
## 1. `plot_one`
|
|
244
|
+
- **Description**: Plots a specified column from a DataFrame with white background and black axes.
|
|
245
|
+
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
246
|
+
- **Example**: `plot_one(df, 'sales', 'date')`
|
|
247
|
+
|
|
248
|
+
## 2. `plot_two`
|
|
249
|
+
- **Description**: Plots specified columns from two DataFrames, optionally on the same or separate y-axes.
|
|
250
|
+
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
251
|
+
- **Example**: `plot_two(df1, 'sales', df2, 'revenue', 'date', same_axis=False)`
|
|
252
|
+
|
|
253
|
+
## 3. `plot_chart`
|
|
254
|
+
- **Description**: Plots various chart types using Plotly, including line, bar, scatter, area, pie, etc.
|
|
255
|
+
- **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values')`
|
|
256
|
+
- **Example**: `plot_chart(df, 'date', ['sales', 'revenue'], chart_type='line', title='Sales and Revenue')`
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Data Pulling
|
|
261
|
+
|
|
262
|
+
## 1. `pull_fred_data`
|
|
263
|
+
- **Description**: Fetch data from FRED using series ID tokens.
|
|
264
|
+
- **Usage**: `pull_fred_data(week_commencing, series_id_list)`
|
|
265
|
+
- **Example**: `pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])`
|
|
266
|
+
|
|
267
|
+
## 2. `pull_boe_data`
|
|
268
|
+
- **Description**: Fetch and process Bank of England interest rate data.
|
|
269
|
+
- **Usage**: `pull_boe_data(week_commencing)`
|
|
270
|
+
- **Example**: `pull_boe_data('mon')`
|
|
271
|
+
|
|
272
|
+
## 3. `pull_oecd`
|
|
273
|
+
- **Description**: Fetch macroeconomic data from OECD for a specified country.
|
|
274
|
+
- **Usage**: `pull_oecd(country='GBR', week_commencing='mon', start_date='2020-01-01')`
|
|
275
|
+
- **Example**: `pull_oecd('GBR', 'mon', '2000-01-01')`
|
|
276
|
+
|
|
277
|
+
## 4. `get_google_mobility_data`
|
|
278
|
+
- **Description**: Fetch Google Mobility data for the specified country.
|
|
279
|
+
- **Usage**: `get_google_mobility_data(country, wc)`
|
|
280
|
+
- **Example**: `get_google_mobility_data('United Kingdom', 'mon')`
|
|
281
|
+
|
|
282
|
+
## 5. `pull_seasonality`
|
|
283
|
+
- **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
|
|
284
|
+
- **Usage**: `pull_seasonality(week_commencing, start_date, countries)`
|
|
285
|
+
- **Example**: `pull_seasonality('mon', '2020-01-01', ['US', 'GB'])`
|
|
286
|
+
|
|
287
|
+
## 6. `pull_weather`
|
|
288
|
+
- **Description**: Fetch and process historical weather data for the specified country.
|
|
289
|
+
- **Usage**: `pull_weather(week_commencing, country)`
|
|
290
|
+
- **Example**: `pull_weather('mon', 'GBR')`
|
|
291
|
+
|
|
292
|
+
## 7. `pull_macro_ons_uk`
|
|
293
|
+
- **Description**: Fetch and process time series data from the Beta ONS API.
|
|
294
|
+
- **Usage**: `pull_macro_ons_uk(additional_list, week_commencing, sector)`
|
|
295
|
+
- **Example**: `pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')`
|
|
296
|
+
|
|
297
|
+
## 8. `pull_yfinance`
|
|
298
|
+
- **Description**: Fetch and process time series data from Yahoo Finance.
|
|
299
|
+
- **Usage**: `pull_yfinance(tickers, week_start_day)`
|
|
300
|
+
- **Example**: `pull_yfinance(['^FTMC', '^IXIC'], 'mon')`
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## Installation
|
|
305
|
+
|
|
306
|
+
Install the IMS package via pip:
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
pip install imsciences
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
---
|
|
313
|
+
|
|
314
|
+
## Usage
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
from imsciences import *
|
|
318
|
+
ims_proc = dataprocessing()
|
|
319
|
+
ims_geo = geoprocessing()
|
|
320
|
+
ims_pull = datapull()
|
|
321
|
+
ims_vis = datavis()
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## License
|
|
327
|
+
|
|
328
|
+
This project is licensed under the MIT License. 
|
|
329
|
+
|
|
330
|
+
---
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: imsciences
|
|
3
|
+
Version: 0.6.1.1
|
|
4
|
+
Summary: IMS Data Processing Package
|
|
5
|
+
Author: IMS
|
|
6
|
+
Author-email: cam@im-sciences.com
|
|
7
|
+
Keywords: python,data processing
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
|
|
17
|
+
# IMS Package Documentation
|
|
18
|
+
|
|
19
|
+
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
20
|
+
|
|
21
|
+
## Data Processing
|
|
22
|
+
|
|
23
|
+
## Data Pulling
|
|
24
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
2
|
+
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
|
+
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
|
+
imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
|
|
5
|
+
imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
|
|
6
|
+
imsciences/datafunctions.py,sha256=WZrXNLO-SYrCuFt0pAbha74psMOZPY7meWJ7yWEbRpk,169953
|
|
7
|
+
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
8
|
+
imsciences/geo.py,sha256=bhtA8s4enWgT8oxyVP6inUgo_rvAEyZY-K6XnOizhkE,9530
|
|
9
|
+
imsciences/mmm.py,sha256=ZV_mzUYbpLfcSYyGKNGyHRs5pKpGPtzB-zJxyq08ul4,73914
|
|
10
|
+
imsciences/pull.py,sha256=bGz8B7bBQ5b9hrx3ipCFTWl_eebEb7rPL4dANKiVWTY,74015
|
|
11
|
+
imsciences/unittesting.py,sha256=DYGqVCsZHrs_tZ-EXDW8q8CdlcsTnG8HsnmWjEE521c,45691
|
|
12
|
+
imsciences/vis.py,sha256=2izdHQhmWEReerRqIxhY4Ai10VjL7xoUqyWyZC7-2XI,8931
|
|
13
|
+
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
14
|
+
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
15
|
+
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
16
|
+
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
17
|
+
imsciences-0.9.3.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
|
|
18
|
+
imsciences-0.9.3.dist-info/METADATA,sha256=IVP2fsJXuR_ePn0VFozQ7b_-BwOf9tdPBElGnphNv4Y,16612
|
|
19
|
+
imsciences-0.9.3.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
20
|
+
imsciences-0.9.3.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
21
|
+
imsciences-0.9.3.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
22
|
+
imsciences-0.9.3.dist-info/RECORD,,
|