PyPI - imsciences - Versions diffs - 0.9.4__py3-none-any.whl → 0.9.5.1__py3-none-any.whl - Mend

imsciences 0.9.4py3-none-any.whl → 0.9.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of imsciences might be problematic. Click here for more details.

Files changed (7) hide show

imsciences/geo.py CHANGED Viewed

@@ -30,6 +30,11 @@ class geoprocessing:
         print("   - Usage: process_itv_analysis(self, raw_df, itv_path, cities_path, media_spend_path, output_path, group1, group2)")
         print("   - Example:process_itv_analysis(df,'itv regional mapping.csv', 'Geo_Mappings_with_Coordinates.xlsx', 'IMS.xlsx', 'itv_for_test_analysis_itvx.csv', ['West', 'Westcountry', 'Tyne Tees'], ['Central Scotland', 'North Scotland'])")
+        print("\n3. process_city_analysis")
+        print("   - Description: Processes city-level data for geo experiments by grouping user metrics, merging with media spend data, and saving the result.")
+        print("   - Usage: process_city_analysis(raw_df, spend_df, output_path, group1, group2, response_column)")
+        print("   - Example:process_city_analysis(df, spend, output, ['Barnsley'], ['Aberdeen'], 'newUsers')")
     def pull_ga(self, credentials_file, property_id, start_date, country, metrics):
         """
         Pulls Google Analytics data using the BetaAnalyticsDataClient.
@@ -192,4 +197,79 @@ class geoprocessing:
         # Save the final output
         analysis_df.to_csv(output_path, index=False)
-        return analysis_df
+        return analysis_df
+    def process_city_analysis(self, raw_input_path, spend_input_path, output_path, group1, group2, response_column):
+        """
+        Process city analysis by grouping data, analyzing user metrics, and merging with spend data.
+        Parameters:
+            raw_input_path (str): Path to the raw input data file (CSV or XLSX) containing at least 'date', 'city', and the specified response column.
+            spend_input_path (str): Path to the media spend data file (CSV or XLSX) with 'date', 'geo', and 'cost' columns. Costs should be numeric.
+            output_path (str): Path to save the final output file (CSV or XLSX).
+            group1 (list): List of city regions for group 1.
+            group2 (list): List of city regions for group 2.
+            response_column (str): Column name to be used as the response metric.
+        Returns:
+            pd.DataFrame: Processed DataFrame.
+        """
+        import pandas as pd
+        import os
+        def read_file(file_path):
+            """Helper function to read CSV or XLSX files."""
+            ext = os.path.splitext(file_path)[1].lower()
+            if ext == '.csv':
+                return pd.read_csv(file_path)
+            elif ext in ['.xlsx', '.xls']:
+                return pd.read_excel(file_path)
+            else:
+                raise ValueError("Unsupported file type. Please use a CSV or XLSX file.")
+        def write_file(df, file_path):
+            """Helper function to write DataFrame to CSV or XLSX files."""
+            ext = os.path.splitext(file_path)[1].lower()
+            if ext == '.csv':
+                df.to_csv(file_path, index=False)
+            elif ext in ['.xlsx', '.xls']:
+                df.to_excel(file_path, index=False, engine='openpyxl')
+            else:
+                raise ValueError("Unsupported file type. Please use a CSV or XLSX file.")
+        # Read input files
+        raw_df = read_file(raw_input_path)
+        spend_df = read_file(spend_input_path)
+        # Ensure necessary columns are present
+        required_columns = {'date', 'city', response_column}
+        if not required_columns.issubset(raw_df.columns):
+            raise ValueError(f"Input DataFrame must contain the following columns: {required_columns}")
+        spend_required_columns = {'date', 'geo', 'cost'}
+        if not spend_required_columns.issubset(spend_df.columns):
+            raise ValueError(f"Spend DataFrame must contain the following columns: {spend_required_columns}")
+        # Convert cost column to numeric after stripping currency symbols and commas
+        spend_df['cost'] = spend_df['cost'].replace('[^\d.]', '', regex=True).astype(float)
+        # Rename and process input DataFrame
+        raw_df = raw_df.rename(columns={'city': 'geo', response_column: 'response'})
+        # Filter and group data
+        filtered_df = raw_df[raw_df['geo'].isin(group1 + group2)].copy()
+        grouped_df = filtered_df.groupby(['date', 'geo'], as_index=False).agg({'response': 'sum'})
+        assignment_map = {city: 1 for city in group1}
+        assignment_map.update({city: 2 for city in group2})
+        grouped_df['assignment'] = grouped_df['geo'].map(assignment_map)
+        # Merge with spend data
+        merged_df = pd.merge(grouped_df, spend_df, on=['date', 'geo'], how='left')
+        merged_df['cost'] = merged_df['cost'].fillna(0)
+        # Save the final output
+        write_file(merged_df, output_path)
+        return merged_df

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.9.4
+Version: 0.9.5.1
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com
@@ -236,6 +236,11 @@ Table of Contents
 - **Usage**: `process_itv_analysis(self, raw_df, itv_path, cities_path, media_spend_path, output_path, group1, group2)`
 - **Example**: `process_itv_analysis(df, 'itv regional mapping.csv', 'Geo_Mappings_with_Coordinates.xlsx', 'IMS.xlsx', 'itv_for_test_analysis_itvx.csv', ['West', 'Westcountry', 'Tyne Tees'], ['Central Scotland', 'North Scotland'])`
+## 3. `process_city_analysis`
+- **Description**: Processes city-level data for geo experiments by grouping user metrics, merging with media spend data, and saving the result.
+- **Usage**: `process_city_analysis(raw_df, spend_df, output_path, group1, group2, response_column)`
+- **Example**: `process_city_analysis(df, spend, output, ['Barnsley'], ['Aberdeen'], 'newUsers')`
 ---
 ## Data Visualisations

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
 imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
 imsciences/datafunctions.py,sha256=WZrXNLO-SYrCuFt0pAbha74psMOZPY7meWJ7yWEbRpk,169953
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
-imsciences/geo.py,sha256=bhtA8s4enWgT8oxyVP6inUgo_rvAEyZY-K6XnOizhkE,9530
+imsciences/geo.py,sha256=kBFc4_DEyQDQlnGnCc09qcydoaW5Lium6kTSdz3li3c,13526
 imsciences/mmm.py,sha256=M91Qs_ijtY4EytTo1rruWCOVGnV7DMVaYUhNpP1NVNc,73920
 imsciences/pull.py,sha256=bGz8B7bBQ5b9hrx3ipCFTWl_eebEb7rPL4dANKiVWTY,74015
 imsciences/unittesting.py,sha256=DYGqVCsZHrs_tZ-EXDW8q8CdlcsTnG8HsnmWjEE521c,45691
@@ -14,9 +14,9 @@ imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_a
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.9.4.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
-imsciences-0.9.4.dist-info/METADATA,sha256=KrKgJEhrswsrqBbCGkVu6l_k4B4SWW9DrOqS8mbF5IA,16612
-imsciences-0.9.4.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
-imsciences-0.9.4.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
-imsciences-0.9.4.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.9.4.dist-info/RECORD,,
+imsciences-0.9.5.1.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
+imsciences-0.9.5.1.dist-info/METADATA,sha256=aTHbZ8Hr_JNbE_UQwbP5CSJtPM-BHNmHnRUn9wlbQXE,16994
+imsciences-0.9.5.1.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
+imsciences-0.9.5.1.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
+imsciences-0.9.5.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.9.5.1.dist-info/RECORD,,

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/PKG-INFO-IMS-24Ltp-3 RENAMED Viewed

File without changes

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.9.4.dist-info → imsciences-0.9.5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.9.4__py3-none-any.whl → 0.9.5.1__py3-none-any.whl

Potentially problematic release.

imsciences 0.9.4py3-none-any.whl → 0.9.5.1py3-none-any.whl