imsciences 0.5.4.7__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imsciences/vis.py ADDED
@@ -0,0 +1,196 @@
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objs as go
4
+
5
+ class datavis:
6
+
7
+ def help(self):
8
+ """
9
+ Displays a help menu listing all the available functions with their descriptions, usage, and examples.
10
+ """
11
+ print("1. plot_one")
12
+ print(" - Description: Plots a specified column from a DataFrame with white background and black axes.")
13
+ print(" - Usage: plot_one(df1, col1, date_column)")
14
+ print(" - Example: plot_one(df, 'sales', 'date')\n")
15
+
16
+ print("2. plot_two")
17
+ print(" - Description: Plots specified columns from two DataFrames, optionally on the same or separate y-axes.")
18
+ print(" - Usage: plot_two(df1, col1, df2, col2, date_column, same_axis=True)")
19
+ print(" - Example: plot_two(df1, 'sales_vol', df2, 'sales_revenue', 'date', same_axis=False)\n")
20
+
21
+ print("3. plot_chart")
22
+ print(" - Description: Plots various chart types using Plotly, including line, bar, scatter, area, pie, etc.")
23
+ print(" - Usage: plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values')")
24
+ print(" - Example: plot_chart(df, 'date', ['sales', 'revenue'], chart_type='line', title='Sales and Revenue')\n")
25
+
26
+ def plot_one(self, df1, col1, date_column):
27
+ """
28
+ Plots specified column from a DataFrame with white background and black axes,
29
+ using a specified date column as the X-axis.
30
+
31
+ :param df1: DataFrame
32
+ :param col1: Column name from the DataFrame
33
+ :param date_column: The name of the date column to use for the X-axis
34
+ """
35
+ # Check if columns exist in the DataFrame
36
+ if col1 not in df1.columns or date_column not in df1.columns:
37
+ raise ValueError("Column not found in DataFrame")
38
+
39
+ # Check if the date column is in datetime format, if not convert it
40
+ if not pd.api.types.is_datetime64_any_dtype(df1[date_column]):
41
+ try:
42
+ # Convert with dayfirst=True to interpret dates correctly
43
+ df1[date_column] = pd.to_datetime(df1[date_column], dayfirst=True)
44
+ except Exception as e:
45
+ raise ValueError(f"Error converting {date_column} to datetime: {e}")
46
+
47
+ # Plotting using Plotly Express
48
+ fig = px.line(df1, x=date_column, y=col1)
49
+
50
+ # Update layout for white background and black axes lines, and setting y-axis to start at 0
51
+ fig.update_layout(
52
+ plot_bgcolor='white',
53
+ xaxis=dict(
54
+ showline=True,
55
+ linecolor='black'
56
+ ),
57
+ yaxis=dict(
58
+ showline=True,
59
+ linecolor='black',
60
+ rangemode='tozero' # Setting Y-axis to start at 0 if suitable
61
+ )
62
+ )
63
+
64
+ return fig
65
+
66
+ def plot_two(self, df1, col1, df2, col2, date_column, same_axis=True):
67
+ """
68
+ Plots specified columns from two different DataFrames with both different and the same lengths,
69
+ using a specified date column as the X-axis, and charting on either the same or separate y-axes.
70
+
71
+ :param df1: First DataFrame
72
+ :param col1: Column name from the first DataFrame
73
+ :param df2: Second DataFrame
74
+ :param col2: Column name from the second DataFrame
75
+ :param date_column: The name of the date column to use for the X-axis
76
+ :param same_axis: If True, plot both traces on the same y-axis; otherwise, use separate y-axes.
77
+ :return: Plotly figure
78
+ """
79
+ # Validate inputs
80
+ if col1 not in df1.columns or date_column not in df1.columns:
81
+ raise ValueError(f"Column {col1} or {date_column} not found in the first DataFrame.")
82
+ if col2 not in df2.columns or date_column not in df2.columns:
83
+ raise ValueError(f"Column {col2} or {date_column} not found in the second DataFrame.")
84
+
85
+ # Ensure date columns are in datetime format
86
+ df1[date_column] = pd.to_datetime(df1[date_column], errors='coerce')
87
+ df2[date_column] = pd.to_datetime(df2[date_column], errors='coerce')
88
+
89
+ # Drop rows with invalid dates
90
+ df1 = df1.dropna(subset=[date_column])
91
+ df2 = df2.dropna(subset=[date_column])
92
+
93
+ # Create traces for the first and second DataFrames
94
+ trace1 = go.Scatter(x=df1[date_column], y=df1[col1], mode='lines', name=col1, yaxis='y1')
95
+
96
+ if same_axis:
97
+ trace2 = go.Scatter(x=df2[date_column], y=df2[col2], mode='lines', name=col2, yaxis='y1')
98
+ else:
99
+ trace2 = go.Scatter(x=df2[date_column], y=df2[col2], mode='lines', name=col2, yaxis='y2')
100
+
101
+ # Define layout for the plot
102
+ layout = go.Layout(
103
+ title="Comparison Plot",
104
+ xaxis=dict(title=date_column, showline=True, linecolor='black'),
105
+ yaxis=dict(
106
+ title=col1 if same_axis else f"{col1} (y1)",
107
+ showline=True,
108
+ linecolor='black',
109
+ rangemode='tozero'
110
+ ),
111
+ yaxis2=dict(
112
+ title=f"{col2} (y2)" if not same_axis else "",
113
+ overlaying='y',
114
+ side='right',
115
+ showline=True,
116
+ linecolor='black',
117
+ rangemode='tozero'
118
+ ),
119
+ showlegend=True,
120
+ plot_bgcolor='white' # Set the plot background color to white
121
+ )
122
+
123
+ # Create the figure with the defined layout and traces
124
+ fig = go.Figure(data=[trace1, trace2], layout=layout)
125
+
126
+ return fig
127
+
128
+ def plot_chart(self, df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
129
+ """
130
+ Plot various types of charts using Plotly.
131
+
132
+ Args:
133
+ df (pandas.DataFrame): DataFrame containing the data.
134
+ date_col (str): The name of the column with date information.
135
+ value_cols (list): List of columns to plot.
136
+ chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', etc.).
137
+ title (str): Title of the chart.
138
+ x_title (str): Title of the x-axis.
139
+ y_title (str): Title of the y-axis.
140
+ **kwargs: Additional keyword arguments for customization.
141
+
142
+ Returns:
143
+ plotly.graph_objects.Figure: The Plotly figure object.
144
+ """
145
+ import pandas as pd
146
+ import plotly.graph_objects as go
147
+
148
+ # Ensure the date column is in datetime format
149
+ df[date_col] = pd.to_datetime(df[date_col])
150
+
151
+ # Validate input columns
152
+ value_cols = [col for col in value_cols if col in df.columns and col != date_col]
153
+ if not value_cols:
154
+ raise ValueError("No valid columns provided for plotting.")
155
+
156
+ # Initialize the figure
157
+ fig = go.Figure()
158
+
159
+ # Define a mapping for chart types to corresponding Plotly trace types
160
+ chart_trace_map = {
161
+ 'line': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='lines', name=col, **kwargs),
162
+ 'bar': lambda col: go.Bar(x=df[date_col], y=df[col], name=col, **kwargs),
163
+ 'scatter': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='markers', name=col, **kwargs),
164
+ 'area': lambda col: go.Scatter(x=df[date_col], y=df[col], mode='lines', fill='tozeroy', name=col, **kwargs),
165
+ 'pie': lambda col: go.Pie(labels=df[date_col], values=df[col], name=col, **kwargs),
166
+ 'box': lambda col: go.Box(y=df[col], name=col, **kwargs),
167
+ 'bubble': lambda _: go.Scatter(
168
+ x=df[value_cols[0]], y=df[value_cols[1]], mode='markers',
169
+ marker=dict(size=df[value_cols[2]]), name='Bubble Chart', **kwargs
170
+ ),
171
+ 'funnel': lambda col: go.Funnel(y=df[date_col], x=df[col], **kwargs),
172
+ 'waterfall': lambda col: go.Waterfall(x=df[date_col], y=df[col], measure=df[value_cols[1]], **kwargs),
173
+ 'scatter3d': lambda _: go.Scatter3d(
174
+ x=df[value_cols[0]], y=df[value_cols[1]], z=df[value_cols[2]],
175
+ mode='markers', **kwargs
176
+ )
177
+ }
178
+
179
+ # Generate traces for the selected chart type
180
+ if chart_type in chart_trace_map:
181
+ for col in value_cols:
182
+ trace = chart_trace_map[chart_type](col)
183
+ fig.add_trace(trace)
184
+ else:
185
+ raise ValueError(f"Unsupported chart type: {chart_type}")
186
+
187
+ # Update the layout of the figure
188
+ fig.update_layout(
189
+ title=title,
190
+ xaxis_title=x_title,
191
+ yaxis_title=y_title,
192
+ legend_title='Series',
193
+ template='plotly_dark'
194
+ )
195
+
196
+ return fig
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Independent Marketing Sciences
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,330 @@
1
+ Metadata-Version: 2.1
2
+ Name: imsciences
3
+ Version: 0.9.3
4
+ Summary: IMS Data Processing Package
5
+ Author: IMS
6
+ Author-email: cam@im-sciences.com
7
+ Keywords: python,data processing,apis
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE.txt
16
+ Requires-Dist: pandas
17
+ Requires-Dist: plotly
18
+ Requires-Dist: numpy
19
+ Requires-Dist: fredapi
20
+ Requires-Dist: bs4
21
+ Requires-Dist: yfinance
22
+ Requires-Dist: holidays
23
+ Requires-Dist: google-analytics-data
24
+ Requires-Dist: geopandas
25
+
26
+ # IMS Package Documentation
27
+
28
+ The **Independent Marketing Sciences** package is a Python library designed to process incoming data into a format tailored for projects, particularly those utilising weekly time series data. This package offers a suite of functions for efficient data collection, manipulation, visualisation and analysis.
29
+
30
+ ---
31
+
32
+ ## Key Features
33
+ - Seamless data processing for time series workflows.
34
+ - Aggregation, filtering, and transformation of time series data.
35
+ - Visualising Data
36
+ - Integration with external data sources like FRED, Bank of England, ONS and OECD.
37
+
38
+ ---
39
+
40
+ Table of Contents
41
+ =================
42
+
43
+ 1. [Data Processing for Time Series](#data-processing-for-time-series)
44
+ 2. [Data Processing for Incrementality Testing](#data-processing-for-incrementality-testing)
45
+ 3. [Data Visualisations](#data-visualisations)
46
+ 4. [Data Pulling](#data-pulling)
47
+ 5. [Installation](#installation)
48
+ 6. [Usage](#usage)
49
+ 7. [License](#license)
50
+
51
+ ---
52
+
53
+ ## Data Processing for Time Series
54
+
55
+ ## 1. `get_wd_levels`
56
+ - **Description**: Get the working directory with the option of moving up parents.
57
+ - **Usage**: `get_wd_levels(levels)`
58
+ - **Example**: `get_wd_levels(0)`
59
+
60
+ ## 2. `aggregate_daily_to_wc_long`
61
+ - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
62
+ - **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
63
+ - **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
64
+
65
+ ## 3. `convert_monthly_to_daily`
66
+ - **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
67
+ - **Usage**: `convert_monthly_to_daily(df, date_column, divide=True)`
68
+ - **Example**: `convert_monthly_to_daily(df, 'date')`
69
+
70
+ ## 4. `week_of_year_mapping`
71
+ - **Description**: Converts a week column in 'yyyy-Www' or 'yyyy-ww' format to week commencing date.
72
+ - **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
73
+ - **Example**: `week_of_year_mapping(df, 'week', 'mon')`
74
+
75
+ ## 5. `rename_cols`
76
+ - **Description**: Renames columns in a pandas DataFrame with a specified prefix or format.
77
+ - **Usage**: `rename_cols(df, name='ame_')`
78
+ - **Example**: `rename_cols(df, 'ame_facebook')`
79
+
80
+ ## 6. `merge_new_and_old`
81
+ - **Description**: Creates a new DataFrame by merging old and new dataframes based on a cutoff date.
82
+ - **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
83
+ - **Example**: `merge_new_and_old(df1, 'old_col', df2, 'new_col', '2023-01-15')`
84
+
85
+ ## 7. `merge_dataframes_on_column`
86
+ - **Description**: Merge a list of DataFrames on a common column.
87
+ - **Usage**: `merge_dataframes_on_column(dataframes, common_column='OBS', merge_how='outer')`
88
+ - **Example**: `merge_dataframes_on_column([df1, df2, df3], common_column='OBS', merge_how='outer')`
89
+
90
+ ## 8. `merge_and_update_dfs`
91
+ - **Description**: Merges two dataframes, updating columns from the second dataframe where values are available.
92
+ - **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
93
+ - **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
94
+
95
+ ## 9. `convert_us_to_uk_dates`
96
+ - **Description**: Convert a DataFrame column with mixed US and UK date formats to datetime.
97
+ - **Usage**: `convert_us_to_uk_dates(df, date_col)`
98
+ - **Example**: `convert_us_to_uk_dates(df, 'date')`
99
+
100
+ ## 10. `combine_sheets`
101
+ - **Description**: Combines multiple DataFrames from a dictionary into a single DataFrame.
102
+ - **Usage**: `combine_sheets(all_sheets)`
103
+ - **Example**: `combine_sheets({'Sheet1': df1, 'Sheet2': df2})`
104
+
105
+ ## 11. `pivot_table`
106
+ - **Description**: Dynamically pivots a DataFrame based on specified columns.
107
+ - **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
108
+ - **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1'}, fill_value=0)`
109
+
110
+ ## 12. `apply_lookup_table_for_columns`
111
+ - **Description**: Maps substrings in columns to new values based on a dictionary.
112
+ - **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
113
+ - **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
114
+
115
+ ## 13. `aggregate_daily_to_wc_wide`
116
+ - **Description**: Aggregates daily data into weekly data and pivots it to wide format.
117
+ - **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc='sun', aggregation='sum', include_totals=False)`
118
+ - **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions'], 'mon', 'average', True)`
119
+
120
+ ## 14. `merge_cols_with_seperator`
121
+ - **Description**: Merges multiple columns in a DataFrame into one column with a specified separator.
122
+ - **Usage**: `merge_cols_with_seperator(df, col_names, separator='_', output_column_name='Merged')`
123
+ - **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], separator='|', output_column_name='Merged Columns')`
124
+
125
+ ## 15. `check_sum_of_df_cols_are_equal`
126
+ - **Description**: Checks if the sum of two columns in two DataFrames are equal and provides the difference.
127
+ - **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
128
+ - **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
129
+
130
+ ## 16. `convert_2_df_cols_to_dict`
131
+ - **Description**: Creates a dictionary from two DataFrame columns.
132
+ - **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
133
+ - **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
134
+
135
+ ## 17. `create_FY_and_H_columns`
136
+ - **Description**: Adds financial year and half-year columns to a DataFrame based on a start date.
137
+ - **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
138
+ - **Example**: `create_FY_and_H_columns(df, 'Week', '2022-10-03', 'FY2023', short_format='Yes')`
139
+
140
+ ## 18. `keyword_lookup_replacement`
141
+ - **Description**: Updates values in a column based on a lookup dictionary with conditional logic.
142
+ - **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
143
+ - **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment'], lookup_dict, output_column_name='Channel New')`
144
+
145
+ ## 19. `create_new_version_of_col_using_LUT`
146
+ - **Description**: Creates a new column based on a lookup table applied to an existing column.
147
+ - **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
148
+ - **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', lookup_dict)`
149
+
150
+ ## 20. `convert_df_wide_2_long`
151
+ - **Description**: Converts a wide-format DataFrame into a long-format DataFrame.
152
+ - **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
153
+ - **Example**: `convert_df_wide_2_long(df, ['col1', 'col2'], variable_col_name='Var', value_col_name='Val')`
154
+
155
+ ## 21. `manually_edit_data`
156
+ - **Description**: Manually updates specified cells in a DataFrame based on filters.
157
+ - **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
158
+ - **Example**: `manually_edit_data(df, {'col1': '== 1'}, 'col2', 'new_val', add_notes='Yes', note='Manual Update')`
159
+
160
+ ## 22. `format_numbers_with_commas`
161
+ - **Description**: Formats numerical columns with commas and a specified number of decimal places.
162
+ - **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
163
+ - **Example**: `format_numbers_with_commas(df, decimal_length_chosen=1)`
164
+
165
+ ## 23. `filter_df_on_multiple_conditions`
166
+ - **Description**: Filters a DataFrame based on multiple column conditions.
167
+ - **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
168
+ - **Example**: `filter_df_on_multiple_conditions(df, {'col1': '>= 5', 'col2': '== 'val''})`
169
+
170
+ ## 24. `read_and_concatenate_files`
171
+ - **Description**: Reads and concatenates files from a specified folder into a single DataFrame.
172
+ - **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
173
+ - **Example**: `read_and_concatenate_files('/path/to/files', file_type='xlsx')`
174
+
175
+ ## 25. `upgrade_outdated_packages`
176
+ - **Description**: Upgrades all outdated Python packages except specified ones.
177
+ - **Usage**: `upgrade_outdated_packages(exclude_packages=['twine'])`
178
+ - **Example**: `upgrade_outdated_packages(exclude_packages=['pip', 'setuptools'])`
179
+
180
+ ## 26. `convert_mixed_formats_dates`
181
+ - **Description**: Converts mixed-format date columns into standardized datetime format.
182
+ - **Usage**: `convert_mixed_formats_dates(df, column_name)`
183
+ - **Example**: `convert_mixed_formats_dates(df, 'date_col')`
184
+
185
+ ## 27. `fill_weekly_date_range`
186
+ - **Description**: Fills in missing weekly dates in a DataFrame with a specified frequency.
187
+ - **Usage**: `fill_weekly_date_range(df, date_column, freq='W-MON')`
188
+ - **Example**: `fill_weekly_date_range(df, 'date_col')`
189
+
190
+ ## 28. `add_prefix_and_suffix`
191
+ - **Description**: Adds prefixes and/or suffixes to column names, with an option to exclude a date column.
192
+ - **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
193
+ - **Example**: `add_prefix_and_suffix(df, prefix='pre_', suffix='_suf', date_col='date_col')`
194
+
195
+ ## 29. `create_dummies`
196
+ - **Description**: Creates dummy variables for columns, with an option to add a total dummy column.
197
+ - **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
198
+ - **Example**: `create_dummies(df, date_col='date_col', dummy_threshold=1)`
199
+
200
+ ## 30. `replace_substrings`
201
+ - **Description**: Replaces substrings in a column based on a dictionary, with options for case conversion and new column creation.
202
+ - **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
203
+ - **Example**: `replace_substrings(df, 'text_col', {'old': 'new'}, to_lower=True, new_column='updated_text')`
204
+
205
+ ## 31. `add_total_column`
206
+ - **Description**: Adds a total column to a DataFrame by summing values across columns, optionally excluding one.
207
+ - **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
208
+ - **Example**: `add_total_column(df, exclude_col='date_col')`
209
+
210
+ ## 32. `apply_lookup_table_based_on_substring`
211
+ - **Description**: Categorizes text in a column using a lookup table based on substrings.
212
+ - **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
213
+ - **Example**: `apply_lookup_table_based_on_substring(df, 'text_col', {'sub1': 'cat1', 'sub2': 'cat2'})`
214
+
215
+ ## 33. `compare_overlap`
216
+ - **Description**: Compares overlapping periods between two DataFrames and summarizes differences.
217
+ - **Usage**: `compare_overlap(df1, df2, date_col)`
218
+ - **Example**: `compare_overlap(df1, df2, 'date_col')`
219
+
220
+ ## 34. `week_commencing_2_week_commencing_conversion_isoweekday`
221
+ - **Description**: Maps dates to the start of the current ISO week based on a specified weekday.
222
+ - **Usage**: `week_commencing_2_week_commencing_conversion_isoweekday(df, date_col, week_commencing='mon')`
223
+ - **Example**: `week_commencing_2_week_commencing_conversion_isoweekday(df, 'date_col', week_commencing='fri')`
224
+
225
+ ---
226
+
227
+ ## Data Processing for Incrementality Testing
228
+
229
+ ## 1. `pull_ga`
230
+ - **Description**: Pull in GA4 data for geo experiments.
231
+ - **Usage**: `pull_ga(credentials_file, property_id, start_date, country, metrics)`
232
+ - **Example**: `pull_ga('GeoExperiment-31c5f5db2c39.json', '111111111', '2023-10-15', 'United Kingdom', ['totalUsers', 'newUsers'])`
233
+
234
+ ## 2. `process_itv_analysis`
235
+ - **Description**: Pull in GA4 data for geo experiments.
236
+ - **Usage**: `process_itv_analysis(self, raw_df, itv_path, cities_path, media_spend_path, output_path, group1, group2)`
237
+ - **Example**: `process_itv_analysis(df, 'itv regional mapping.csv', 'Geo_Mappings_with_Coordinates.xlsx', 'IMS.xlsx', 'itv_for_test_analysis_itvx.csv', ['West', 'Westcountry', 'Tyne Tees'], ['Central Scotland', 'North Scotland'])`
238
+
239
+ ---
240
+
241
+ ## Data Visualisations
242
+
243
+ ## 1. `plot_one`
244
+ - **Description**: Plots a specified column from a DataFrame with white background and black axes.
245
+ - **Usage**: `plot_one(df1, col1, date_column)`
246
+ - **Example**: `plot_one(df, 'sales', 'date')`
247
+
248
+ ## 2. `plot_two`
249
+ - **Description**: Plots specified columns from two DataFrames, optionally on the same or separate y-axes.
250
+ - **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
251
+ - **Example**: `plot_two(df1, 'sales', df2, 'revenue', 'date', same_axis=False)`
252
+
253
+ ## 3. `plot_chart`
254
+ - **Description**: Plots various chart types using Plotly, including line, bar, scatter, area, pie, etc.
255
+ - **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values')`
256
+ - **Example**: `plot_chart(df, 'date', ['sales', 'revenue'], chart_type='line', title='Sales and Revenue')`
257
+
258
+ ---
259
+
260
+ ## Data Pulling
261
+
262
+ ## 1. `pull_fred_data`
263
+ - **Description**: Fetch data from FRED using series ID tokens.
264
+ - **Usage**: `pull_fred_data(week_commencing, series_id_list)`
265
+ - **Example**: `pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])`
266
+
267
+ ## 2. `pull_boe_data`
268
+ - **Description**: Fetch and process Bank of England interest rate data.
269
+ - **Usage**: `pull_boe_data(week_commencing)`
270
+ - **Example**: `pull_boe_data('mon')`
271
+
272
+ ## 3. `pull_oecd`
273
+ - **Description**: Fetch macroeconomic data from OECD for a specified country.
274
+ - **Usage**: `pull_oecd(country='GBR', week_commencing='mon', start_date='2020-01-01')`
275
+ - **Example**: `pull_oecd('GBR', 'mon', '2000-01-01')`
276
+
277
+ ## 4. `get_google_mobility_data`
278
+ - **Description**: Fetch Google Mobility data for the specified country.
279
+ - **Usage**: `get_google_mobility_data(country, wc)`
280
+ - **Example**: `get_google_mobility_data('United Kingdom', 'mon')`
281
+
282
+ ## 5. `pull_seasonality`
283
+ - **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
284
+ - **Usage**: `pull_seasonality(week_commencing, start_date, countries)`
285
+ - **Example**: `pull_seasonality('mon', '2020-01-01', ['US', 'GB'])`
286
+
287
+ ## 6. `pull_weather`
288
+ - **Description**: Fetch and process historical weather data for the specified country.
289
+ - **Usage**: `pull_weather(week_commencing, country)`
290
+ - **Example**: `pull_weather('mon', 'GBR')`
291
+
292
+ ## 7. `pull_macro_ons_uk`
293
+ - **Description**: Fetch and process time series data from the Beta ONS API.
294
+ - **Usage**: `pull_macro_ons_uk(additional_list, week_commencing, sector)`
295
+ - **Example**: `pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')`
296
+
297
+ ## 8. `pull_yfinance`
298
+ - **Description**: Fetch and process time series data from Yahoo Finance.
299
+ - **Usage**: `pull_yfinance(tickers, week_start_day)`
300
+ - **Example**: `pull_yfinance(['^FTMC', '^IXIC'], 'mon')`
301
+
302
+ ---
303
+
304
+ ## Installation
305
+
306
+ Install the IMS package via pip:
307
+
308
+ ```bash
309
+ pip install imsciences
310
+ ```
311
+
312
+ ---
313
+
314
+ ## Usage
315
+
316
+ ```bash
317
+ from imsciences import *
318
+ ims_proc = dataprocessing()
319
+ ims_geo = geoprocessing()
320
+ ims_pull = datapull()
321
+ ims_vis = datavis()
322
+ ```
323
+
324
+ ---
325
+
326
+ ## License
327
+
328
+ This project is licensed under the MIT License. ![License](https://img.shields.io/badge/license-MIT-blue.svg)
329
+
330
+ ---
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.1
2
+ Name: imsciences
3
+ Version: 0.6.1.1
4
+ Summary: IMS Data Processing Package
5
+ Author: IMS
6
+ Author-email: cam@im-sciences.com
7
+ Keywords: python,data processing
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: pandas
16
+
17
+ # IMS Package Documentation
18
+
19
+ The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
20
+
21
+ ## Data Processing
22
+
23
+ ## Data Pulling
24
+
@@ -0,0 +1,22 @@
1
+ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
+ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
+ dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
+ imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
5
+ imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
6
+ imsciences/datafunctions.py,sha256=WZrXNLO-SYrCuFt0pAbha74psMOZPY7meWJ7yWEbRpk,169953
7
+ imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
8
+ imsciences/geo.py,sha256=bhtA8s4enWgT8oxyVP6inUgo_rvAEyZY-K6XnOizhkE,9530
9
+ imsciences/mmm.py,sha256=ZV_mzUYbpLfcSYyGKNGyHRs5pKpGPtzB-zJxyq08ul4,73914
10
+ imsciences/pull.py,sha256=bGz8B7bBQ5b9hrx3ipCFTWl_eebEb7rPL4dANKiVWTY,74015
11
+ imsciences/unittesting.py,sha256=DYGqVCsZHrs_tZ-EXDW8q8CdlcsTnG8HsnmWjEE521c,45691
12
+ imsciences/vis.py,sha256=2izdHQhmWEReerRqIxhY4Ai10VjL7xoUqyWyZC7-2XI,8931
13
+ imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
14
+ imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
15
+ imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
16
+ imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
17
+ imsciences-0.9.3.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
18
+ imsciences-0.9.3.dist-info/METADATA,sha256=IVP2fsJXuR_ePn0VFozQ7b_-BwOf9tdPBElGnphNv4Y,16612
19
+ imsciences-0.9.3.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
20
+ imsciences-0.9.3.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
21
+ imsciences-0.9.3.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
22
+ imsciences-0.9.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: setuptools (74.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5