imsciences 0.9.5.1__tar.gz → 0.9.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/PKG-INFO +1 -1
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/geo.py +13 -11
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/mmm.py +13 -8
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/setup.py +1 -1
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/LICENSE.txt +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/README.md +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/__init__.py +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/pull.py +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/unittesting.py +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences/vis.py +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/PKG-INFO-IMS-24Ltp-3 +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.9.5.1 → imsciences-0.9.5.3}/setup.cfg +0 -0
|
@@ -199,13 +199,13 @@ class geoprocessing:
|
|
|
199
199
|
|
|
200
200
|
return analysis_df
|
|
201
201
|
|
|
202
|
-
def process_city_analysis(self,
|
|
202
|
+
def process_city_analysis(self, raw_data, spend_data, output_path, group1, group2, response_column):
|
|
203
203
|
"""
|
|
204
204
|
Process city analysis by grouping data, analyzing user metrics, and merging with spend data.
|
|
205
205
|
|
|
206
206
|
Parameters:
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
raw_data (str or pd.DataFrame): Raw input data as a file path (CSV/XLSX) or DataFrame.
|
|
208
|
+
spend_data (str or pd.DataFrame): Spend data as a file path (CSV/XLSX) or DataFrame.
|
|
209
209
|
output_path (str): Path to save the final output file (CSV or XLSX).
|
|
210
210
|
group1 (list): List of city regions for group 1.
|
|
211
211
|
group2 (list): List of city regions for group 2.
|
|
@@ -217,13 +217,15 @@ class geoprocessing:
|
|
|
217
217
|
import pandas as pd
|
|
218
218
|
import os
|
|
219
219
|
|
|
220
|
-
def read_file(
|
|
221
|
-
"""Helper function to
|
|
222
|
-
|
|
220
|
+
def read_file(data):
|
|
221
|
+
"""Helper function to handle file paths or return DataFrame directly."""
|
|
222
|
+
if isinstance(data, pd.DataFrame):
|
|
223
|
+
return data
|
|
224
|
+
ext = os.path.splitext(data)[1].lower()
|
|
223
225
|
if ext == '.csv':
|
|
224
|
-
return pd.read_csv(
|
|
226
|
+
return pd.read_csv(data)
|
|
225
227
|
elif ext in ['.xlsx', '.xls']:
|
|
226
|
-
return pd.read_excel(
|
|
228
|
+
return pd.read_excel(data)
|
|
227
229
|
else:
|
|
228
230
|
raise ValueError("Unsupported file type. Please use a CSV or XLSX file.")
|
|
229
231
|
|
|
@@ -237,9 +239,9 @@ class geoprocessing:
|
|
|
237
239
|
else:
|
|
238
240
|
raise ValueError("Unsupported file type. Please use a CSV or XLSX file.")
|
|
239
241
|
|
|
240
|
-
# Read
|
|
241
|
-
raw_df = read_file(
|
|
242
|
-
spend_df = read_file(
|
|
242
|
+
# Read data
|
|
243
|
+
raw_df = read_file(raw_data)
|
|
244
|
+
spend_df = read_file(spend_data)
|
|
243
245
|
|
|
244
246
|
# Ensure necessary columns are present
|
|
245
247
|
required_columns = {'date', 'city', response_column}
|
|
@@ -492,15 +492,15 @@ class dataprocessing:
|
|
|
492
492
|
|
|
493
493
|
return combined_df
|
|
494
494
|
|
|
495
|
-
def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc="sum", margins=False, margins_name="Total", datetime_trans_needed=True, date_format="%Y-%m-%d", reverse_header_order=False, fill_missing_weekly_dates=
|
|
495
|
+
def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc="sum", margins=False, margins_name="Total", datetime_trans_needed=True, date_format="%Y-%m-%d", reverse_header_order=False, fill_missing_weekly_dates=True, week_commencing="W-MON"):
|
|
496
496
|
"""
|
|
497
497
|
Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
|
|
498
498
|
|
|
499
499
|
Args:
|
|
500
500
|
df (pandas.DataFrame): The DataFrame containing the data.
|
|
501
501
|
index_col (str): Name of Column for your pivot table to index on
|
|
502
|
-
columns (str): Name of
|
|
503
|
-
values_col (str): Name of Values
|
|
502
|
+
columns (str or list): Name of Column(s) for your pivot table. Can be a single column or a list of columns.
|
|
503
|
+
values_col (str or list): Name of Values Column(s) for your pivot table. Can be a single column or a list of columns.
|
|
504
504
|
filters_dict (dict, optional): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell. Defaults to None
|
|
505
505
|
fill_value (int, optional): The value to replace nan with. Defaults to 0.
|
|
506
506
|
aggfunc (str, optional): The method on which to aggregate the values column. Defaults to sum.
|
|
@@ -514,14 +514,19 @@ class dataprocessing:
|
|
|
514
514
|
Returns:
|
|
515
515
|
pandas.DataFrame: The pivot table specified
|
|
516
516
|
"""
|
|
517
|
-
|
|
518
517
|
# Validate inputs
|
|
519
518
|
if index_col not in df.columns:
|
|
520
519
|
raise ValueError(f"index_col '{index_col}' not found in DataFrame.")
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
520
|
+
|
|
521
|
+
columns = [columns] if isinstance(columns, str) else columns
|
|
522
|
+
for col in columns:
|
|
523
|
+
if col not in df.columns:
|
|
524
|
+
raise ValueError(f"columns '{col}' not found in DataFrame.")
|
|
525
|
+
|
|
526
|
+
values_col = [values_col] if isinstance(values_col, str) else values_col
|
|
527
|
+
for col in values_col:
|
|
528
|
+
if col not in df.columns:
|
|
529
|
+
raise ValueError(f"values_col '{col}' not found in DataFrame.")
|
|
525
530
|
|
|
526
531
|
# Apply filters if provided
|
|
527
532
|
if filters_dict:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|