imsciences 0.6.2.5__py3-none-any.whl → 0.6.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,13 +109,13 @@ class dataprocessing:
109
109
 
110
110
  print("\n17. pivot_table")
111
111
  print(" - Description: Dynamically pivots a DataFrame based on specified columns.")
112
- print(" - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
113
- print(" - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
112
+ print(" - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False',fill_missing_weekly_dates=False,week_commencing='W-MON')")
113
+ print(" - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True',fill_missing_weekly_dates=True,week_commencing='W-MON')")
114
114
 
115
115
  print("\n18. apply_lookup_table_for_columns")
116
116
  print(" - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
117
- print(" - Usage: classify_within_column(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping'")
118
- print(" - Example: classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')")
117
+ print(" - Usage: apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')")
118
+ print(" - Example: apply_lookup_table_for_columns(df, col_names, {'spend':'spd','clicks':'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')")
119
119
 
120
120
  print("\n19. aggregate_daily_to_wc_wide")
121
121
  print(" - Description: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.")
@@ -226,6 +226,16 @@ class dataprocessing:
226
226
  print(" - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
227
227
  print(" - Usage: compare_overlap(df1, df2, date_col)")
228
228
  print(" - Example: compare_overlap(df_1, df_2, 'obs')")
229
+
230
+ print("\n41. week_commencing_2_week_commencing_conversion")
231
+ print(" - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
232
+ print(" - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
233
+ print(" - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
234
+
235
+ print("\n42. week_commencing_2_week_commencing_conversion")
236
+ print(" - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
237
+ print(" - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
238
+ print(" - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
229
239
 
230
240
 
231
241
 
@@ -664,7 +674,7 @@ class dataprocessing:
664
674
 
665
675
  return combined_df
666
676
 
667
- def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
677
+ def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False,fill_missing_weekly_dates=False,week_commencing='W-MON'):
668
678
  """
669
679
  Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
670
680
 
@@ -680,6 +690,8 @@ class dataprocessing:
680
690
  margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
681
691
  datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
682
692
  reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
693
+ fill_missing_weekly_dates (bool, optional): Fills in any weekly missing dates. Defaults to False.
694
+ week_commencing (str,optional): Fills in missing weeks if option is specified. Defaults to 'W-MON'.
683
695
 
684
696
  Returns:
685
697
  pandas.DataFrame: The pivot table specified
@@ -730,6 +742,10 @@ class dataprocessing:
730
742
  # Fill in any NaNs
731
743
  pivoted_df = pivoted_df.fillna(fill_value)
732
744
 
745
+ # If there is a need to fill in missing weeks
746
+ if fill_missing_weekly_dates == True:
747
+ pivoted_df = self.fill_weekly_date_range(pivoted_df, index_col, freq=week_commencing)
748
+
733
749
  return pivoted_df
734
750
 
735
751
  def apply_lookup_table_for_columns(self, df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
@@ -1482,7 +1498,7 @@ class dataprocessing:
1482
1498
  df[new_col_name] = df[column_name].apply(categorize_text)
1483
1499
  return df
1484
1500
 
1485
- def compare_overlap(self, df1, df2, date_col):
1501
+ def compare_overlap(df1, df2, date_col):
1486
1502
  """
1487
1503
  Compare overlapping periods between two DataFrames and provide a summary of total differences.
1488
1504
 
@@ -1509,21 +1525,23 @@ class dataprocessing:
1509
1525
  # Merge the dataframes on the date column to align data for comparison
1510
1526
  merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
1511
1527
 
1528
+ # Get the common columns between the two DataFrames, excluding the date column
1529
+ common_cols = [col for col in df1.columns if col != date_col and col in df2.columns]
1530
+
1512
1531
  # Initialize a list to collect total differences for each column
1513
1532
  total_diff_list = []
1514
1533
 
1515
- # Compare the values in each column (excluding the date column)
1534
+ # Create a DataFrame for the differences
1516
1535
  diff_df = pd.DataFrame({date_col: merged_df[date_col]}) # Initialize diff_df with the date column
1517
1536
 
1518
- for col in df1.columns:
1519
- if col != date_col:
1520
- # Calculate the difference for each row
1521
- diff_col = f'diff_{col}'
1522
- diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
1523
-
1524
- # Calculate the total difference for the column and add it to the list
1525
- total_diff = diff_df[diff_col].sum()
1526
- total_diff_list.append({'Column': col, 'Total Difference': total_diff})
1537
+ for col in common_cols:
1538
+ # Calculate the difference for each row
1539
+ diff_col = f'diff_{col}'
1540
+ diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
1541
+
1542
+ # Calculate the total difference for the column and add it to the list
1543
+ total_diff = diff_df[diff_col].sum()
1544
+ total_diff_list.append({'Column': col, 'Total Difference': total_diff})
1527
1545
 
1528
1546
  # Create a DataFrame for the summary of total differences
1529
1547
  total_diff_df = pd.DataFrame(total_diff_list)
@@ -1535,6 +1553,162 @@ class dataprocessing:
1535
1553
 
1536
1554
  return diff_df, total_diff_df
1537
1555
 
1556
+ # Convert week commencing col (should be most likely monday to sunday or vice versa)
1557
+ def week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun'):
1558
+ """
1559
+ Convert week commencing column in a DataFrame to the start of another day specified.
1560
+
1561
+ Args:
1562
+ df (pandas.DataFrame): The DataFrame containing the date-based data.
1563
+ date_col (str): The name of the date column in the DataFrame.
1564
+ week_commencing (str, optional): The day of the week that the week starts on ('sun' for Sunday, 'mon' for Monday, etc.). Defaults to 'sun'.
1565
+
1566
+ Returns:
1567
+ pandas.DataFrame: The original DataFrame with an additional column indicating the start of the week.
1568
+ """
1569
+ # Week commencing dictionary
1570
+ day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
1571
+ df['week_start_'+ week_commencing] = df[date_col].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1572
+
1573
+ return df
1574
+
1575
+ def plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
1576
+ """
1577
+ Plot various types of charts using Plotly.
1578
+
1579
+ Args:
1580
+ df (pandas.DataFrame): DataFrame containing the data.
1581
+ date_col (str): The name of the column with date information.
1582
+ value_cols (list): List of columns to plot.
1583
+ chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', 'pie', 'box', 'heatmap', 'area', 'bubble', 'funnel', 'waterfall', 'contour', 'scatter3d').
1584
+ title (str): Title of the chart.
1585
+ x_title (str): Title of the x-axis.
1586
+ y_title (str): Title of the y-axis.
1587
+ **kwargs: Additional keyword arguments for customization.
1588
+
1589
+ Returns:
1590
+ plotly.graph_objects.Figure: The Plotly figure object.
1591
+ """
1592
+ # Ensure the date column is in datetime format
1593
+ df[date_col] = pd.to_datetime(df[date_col])
1594
+
1595
+ # Initialize the figure
1596
+ fig = go.Figure()
1597
+
1598
+ # Make sure the date col is excluded from the line cols
1599
+ value_cols = [x for x in value_cols if x!=date_col]
1600
+
1601
+ # Add each value column to the plot based on the chart type
1602
+ for col in value_cols:
1603
+ if chart_type == 'line':
1604
+ fig.add_trace(go.Scatter(
1605
+ x=df[date_col],
1606
+ y=df[col],
1607
+ mode='lines',
1608
+ name=col,
1609
+ **kwargs
1610
+ ))
1611
+ elif chart_type == 'bar':
1612
+ fig.add_trace(go.Bar(
1613
+ x=df[date_col],
1614
+ y=df[col],
1615
+ name=col,
1616
+ **kwargs
1617
+ ))
1618
+ elif chart_type == 'scatter':
1619
+ fig.add_trace(go.Scatter(
1620
+ x=df[date_col],
1621
+ y=df[col],
1622
+ mode='markers',
1623
+ name=col,
1624
+ **kwargs
1625
+ ))
1626
+ elif chart_type == 'histogram':
1627
+ fig.add_trace(go.Histogram(
1628
+ x=df[col],
1629
+ name=col,
1630
+ **kwargs
1631
+ ))
1632
+ elif chart_type == 'pie':
1633
+ fig.add_trace(go.Pie(
1634
+ labels=df[date_col], # or another column for labels
1635
+ values=df[col],
1636
+ name=col,
1637
+ **kwargs
1638
+ ))
1639
+ elif chart_type == 'box':
1640
+ fig.add_trace(go.Box(
1641
+ y=df[col],
1642
+ name=col,
1643
+ **kwargs
1644
+ ))
1645
+ elif chart_type == 'heatmap':
1646
+ fig.add_trace(go.Heatmap(
1647
+ z=df.pivot_table(index=date_col, columns=value_cols[0], values=value_cols[1]),
1648
+ x=df[value_cols[0]],
1649
+ y=df[date_col],
1650
+ **kwargs
1651
+ ))
1652
+ elif chart_type == 'area':
1653
+ fig.add_trace(go.Scatter(
1654
+ x=df[date_col],
1655
+ y=df[col],
1656
+ mode='lines', # Use 'lines+markers' if you want markers
1657
+ fill='tozeroy', # Fill the area under the line
1658
+ name=col,
1659
+ **kwargs
1660
+ ))
1661
+ elif chart_type == 'bubble':
1662
+ fig.add_trace(go.Scatter(
1663
+ x=df[value_cols[0]],
1664
+ y=df[value_cols[1]],
1665
+ mode='markers',
1666
+ marker=dict(size=df[value_cols[2]]),
1667
+ name='Bubble Chart',
1668
+ **kwargs
1669
+ ))
1670
+ elif chart_type == 'funnel':
1671
+ fig.add_trace(go.Funnel(
1672
+ y=df[date_col],
1673
+ x=df[col],
1674
+ **kwargs
1675
+ ))
1676
+ elif chart_type == 'waterfall':
1677
+ fig.add_trace(go.Waterfall(
1678
+ x=df[date_col],
1679
+ y=df[col],
1680
+ measure=df[value_cols[1]], # measures like 'increase', 'decrease', 'total'
1681
+ **kwargs
1682
+ ))
1683
+ elif chart_type == 'contour':
1684
+ fig.add_trace(go.Contour(
1685
+ z=df.pivot_table(index=value_cols[0], columns=value_cols[1], values=value_cols[2]),
1686
+ x=df[value_cols[0]],
1687
+ y=df[value_cols[1]],
1688
+ **kwargs
1689
+ ))
1690
+ elif chart_type == 'scatter3d':
1691
+ fig.add_trace(go.Scatter3d(
1692
+ x=df[value_cols[0]],
1693
+ y=df[value_cols[1]],
1694
+ z=df[value_cols[2]],
1695
+ mode='markers',
1696
+ **kwargs
1697
+ ))
1698
+ else:
1699
+ raise ValueError(f"Unsupported chart type: {chart_type}")
1700
+
1701
+ # Update the layout of the figure
1702
+ fig.update_layout(
1703
+ title=title,
1704
+ xaxis_title=x_title,
1705
+ yaxis_title=y_title,
1706
+ legend_title='Series',
1707
+ template='plotly_dark'
1708
+ )
1709
+
1710
+ return fig
1711
+
1538
1712
  ########################################################################################################################################
1539
1713
  ########################################################################################################################################
1540
1714
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.2.5
3
+ Version: 0.6.2.7
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -3,14 +3,14 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
5
  imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
6
- imsciences/datafunctions.py,sha256=WOdezS0IW_n91eWr9SL_czLjCCkl5-951n58sjCG51Q,143447
6
+ imsciences/datafunctions.py,sha256=PGuvgJIurXGWM8E1M_w9BijUJGBm5FTaZVE-C1_sPog,151382
7
7
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
8
8
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
9
9
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
10
10
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
11
11
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
12
- imsciences-0.6.2.5.dist-info/METADATA,sha256=HOqcQ7JFLITN-1vcra1XZTHQ72_JM_vajUfmpXNMyl4,854
13
- imsciences-0.6.2.5.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
14
- imsciences-0.6.2.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- imsciences-0.6.2.5.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
16
- imsciences-0.6.2.5.dist-info/RECORD,,
12
+ imsciences-0.6.2.7.dist-info/METADATA,sha256=0IT7pWYxsHXerkqBVKsS2Zh1_6qwn1u7NL3mK44c4tk,854
13
+ imsciences-0.6.2.7.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
14
+ imsciences-0.6.2.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ imsciences-0.6.2.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
16
+ imsciences-0.6.2.7.dist-info/RECORD,,