imsciences 0.6.2.6__py3-none-any.whl → 0.6.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -114,8 +114,8 @@ class dataprocessing:
114
114
 
115
115
  print("\n18. apply_lookup_table_for_columns")
116
116
  print(" - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
117
- print(" - Usage: classify_within_column(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping'")
118
- print(" - Example: classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')")
117
+ print(" - Usage: apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')")
118
+ print(" - Example: apply_lookup_table_for_columns(df, col_names, {'spend':'spd','clicks':'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')")
119
119
 
120
120
  print("\n19. aggregate_daily_to_wc_wide")
121
121
  print(" - Description: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.")
@@ -226,6 +226,16 @@ class dataprocessing:
226
226
  print(" - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
227
227
  print(" - Usage: compare_overlap(df1, df2, date_col)")
228
228
  print(" - Example: compare_overlap(df_1, df_2, 'obs')")
229
+
230
+ print("\n41. week_commencing_2_week_commencing_conversion")
231
+ print(" - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
232
+ print(" - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
233
+ print(" - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
234
+
235
+ print("\n42. plot_chart")
236
+ print(" - Description: Plots a range of charts including line, area, scatter, bubble, bar etc.")
237
+ print(" - Usage: plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)")
238
+ print(" - Example: plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')")
229
239
 
230
240
 
231
241
 
@@ -1488,7 +1498,7 @@ class dataprocessing:
1488
1498
  df[new_col_name] = df[column_name].apply(categorize_text)
1489
1499
  return df
1490
1500
 
1491
- def compare_overlap(self, df1, df2, date_col):
1501
+ def compare_overlap(self,df1, df2, date_col):
1492
1502
  """
1493
1503
  Compare overlapping periods between two DataFrames and provide a summary of total differences.
1494
1504
 
@@ -1515,21 +1525,23 @@ class dataprocessing:
1515
1525
  # Merge the dataframes on the date column to align data for comparison
1516
1526
  merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
1517
1527
 
1528
+ # Get the common columns between the two DataFrames, excluding the date column
1529
+ common_cols = [col for col in df1.columns if col != date_col and col in df2.columns]
1530
+
1518
1531
  # Initialize a list to collect total differences for each column
1519
1532
  total_diff_list = []
1520
1533
 
1521
- # Compare the values in each column (excluding the date column)
1534
+ # Create a DataFrame for the differences
1522
1535
  diff_df = pd.DataFrame({date_col: merged_df[date_col]}) # Initialize diff_df with the date column
1523
1536
 
1524
- for col in df1.columns:
1525
- if col != date_col:
1526
- # Calculate the difference for each row
1527
- diff_col = f'diff_{col}'
1528
- diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
1529
-
1530
- # Calculate the total difference for the column and add it to the list
1531
- total_diff = diff_df[diff_col].sum()
1532
- total_diff_list.append({'Column': col, 'Total Difference': total_diff})
1537
+ for col in common_cols:
1538
+ # Calculate the difference for each row
1539
+ diff_col = f'diff_{col}'
1540
+ diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
1541
+
1542
+ # Calculate the total difference for the column and add it to the list
1543
+ total_diff = diff_df[diff_col].sum()
1544
+ total_diff_list.append({'Column': col, 'Total Difference': total_diff})
1533
1545
 
1534
1546
  # Create a DataFrame for the summary of total differences
1535
1547
  total_diff_df = pd.DataFrame(total_diff_list)
@@ -1541,6 +1553,162 @@ class dataprocessing:
1541
1553
 
1542
1554
  return diff_df, total_diff_df
1543
1555
 
1556
+ # Convert week commencing col (should be most likely monday to sunday or vice versa)
1557
+ def week_commencing_2_week_commencing_conversion(self,df,date_col,week_commencing='sun'):
1558
+ """
1559
+ Convert week commencing column in a DataFrame to the start of another day specified.
1560
+
1561
+ Args:
1562
+ df (pandas.DataFrame): The DataFrame containing the date-based data.
1563
+ date_col (str): The name of the date column in the DataFrame.
1564
+ week_commencing (str, optional): The day of the week that the week starts on ('sun' for Sunday, 'mon' for Monday, etc.). Defaults to 'sun'.
1565
+
1566
+ Returns:
1567
+ pandas.DataFrame: The original DataFrame with an additional column indicating the start of the week.
1568
+ """
1569
+ # Week commencing dictionary
1570
+ day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
1571
+ df['week_start_'+ week_commencing] = df[date_col].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1572
+
1573
+ return df
1574
+
1575
+ def plot_chart(self, df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
1576
+ """
1577
+ Plot various types of charts using Plotly.
1578
+
1579
+ Args:
1580
+ df (pandas.DataFrame): DataFrame containing the data.
1581
+ date_col (str): The name of the column with date information.
1582
+ value_cols (list): List of columns to plot.
1583
+ chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', 'pie', 'box', 'heatmap', 'area', 'bubble', 'funnel', 'waterfall', 'contour', 'scatter3d').
1584
+ title (str): Title of the chart.
1585
+ x_title (str): Title of the x-axis.
1586
+ y_title (str): Title of the y-axis.
1587
+ **kwargs: Additional keyword arguments for customization.
1588
+
1589
+ Returns:
1590
+ plotly.graph_objects.Figure: The Plotly figure object.
1591
+ """
1592
+ # Ensure the date column is in datetime format
1593
+ df[date_col] = pd.to_datetime(df[date_col])
1594
+
1595
+ # Initialize the figure
1596
+ fig = go.Figure()
1597
+
1598
+ # Make sure the date col is excluded from the line cols
1599
+ value_cols = [x for x in value_cols if x!=date_col]
1600
+
1601
+ # Add each value column to the plot based on the chart type
1602
+ for col in value_cols:
1603
+ if chart_type == 'line':
1604
+ fig.add_trace(go.Scatter(
1605
+ x=df[date_col],
1606
+ y=df[col],
1607
+ mode='lines',
1608
+ name=col,
1609
+ **kwargs
1610
+ ))
1611
+ elif chart_type == 'bar':
1612
+ fig.add_trace(go.Bar(
1613
+ x=df[date_col],
1614
+ y=df[col],
1615
+ name=col,
1616
+ **kwargs
1617
+ ))
1618
+ elif chart_type == 'scatter':
1619
+ fig.add_trace(go.Scatter(
1620
+ x=df[date_col],
1621
+ y=df[col],
1622
+ mode='markers',
1623
+ name=col,
1624
+ **kwargs
1625
+ ))
1626
+ elif chart_type == 'histogram':
1627
+ fig.add_trace(go.Histogram(
1628
+ x=df[col],
1629
+ name=col,
1630
+ **kwargs
1631
+ ))
1632
+ elif chart_type == 'pie':
1633
+ fig.add_trace(go.Pie(
1634
+ labels=df[date_col], # or another column for labels
1635
+ values=df[col],
1636
+ name=col,
1637
+ **kwargs
1638
+ ))
1639
+ elif chart_type == 'box':
1640
+ fig.add_trace(go.Box(
1641
+ y=df[col],
1642
+ name=col,
1643
+ **kwargs
1644
+ ))
1645
+ elif chart_type == 'heatmap':
1646
+ fig.add_trace(go.Heatmap(
1647
+ z=df.pivot_table(index=date_col, columns=value_cols[0], values=value_cols[1]),
1648
+ x=df[value_cols[0]],
1649
+ y=df[date_col],
1650
+ **kwargs
1651
+ ))
1652
+ elif chart_type == 'area':
1653
+ fig.add_trace(go.Scatter(
1654
+ x=df[date_col],
1655
+ y=df[col],
1656
+ mode='lines', # Use 'lines+markers' if you want markers
1657
+ fill='tozeroy', # Fill the area under the line
1658
+ name=col,
1659
+ **kwargs
1660
+ ))
1661
+ elif chart_type == 'bubble':
1662
+ fig.add_trace(go.Scatter(
1663
+ x=df[value_cols[0]],
1664
+ y=df[value_cols[1]],
1665
+ mode='markers',
1666
+ marker=dict(size=df[value_cols[2]]),
1667
+ name='Bubble Chart',
1668
+ **kwargs
1669
+ ))
1670
+ elif chart_type == 'funnel':
1671
+ fig.add_trace(go.Funnel(
1672
+ y=df[date_col],
1673
+ x=df[col],
1674
+ **kwargs
1675
+ ))
1676
+ elif chart_type == 'waterfall':
1677
+ fig.add_trace(go.Waterfall(
1678
+ x=df[date_col],
1679
+ y=df[col],
1680
+ measure=df[value_cols[1]], # measures like 'increase', 'decrease', 'total'
1681
+ **kwargs
1682
+ ))
1683
+ elif chart_type == 'contour':
1684
+ fig.add_trace(go.Contour(
1685
+ z=df.pivot_table(index=value_cols[0], columns=value_cols[1], values=value_cols[2]),
1686
+ x=df[value_cols[0]],
1687
+ y=df[value_cols[1]],
1688
+ **kwargs
1689
+ ))
1690
+ elif chart_type == 'scatter3d':
1691
+ fig.add_trace(go.Scatter3d(
1692
+ x=df[value_cols[0]],
1693
+ y=df[value_cols[1]],
1694
+ z=df[value_cols[2]],
1695
+ mode='markers',
1696
+ **kwargs
1697
+ ))
1698
+ else:
1699
+ raise ValueError(f"Unsupported chart type: {chart_type}")
1700
+
1701
+ # Update the layout of the figure
1702
+ fig.update_layout(
1703
+ title=title,
1704
+ xaxis_title=x_title,
1705
+ yaxis_title=y_title,
1706
+ legend_title='Series',
1707
+ template='plotly_dark'
1708
+ )
1709
+
1710
+ return fig
1711
+
1544
1712
  ########################################################################################################################################
1545
1713
  ########################################################################################################################################
1546
1714
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.2.6
3
+ Version: 0.6.2.8
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -3,14 +3,14 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
5
  imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
6
- imsciences/datafunctions.py,sha256=m5fXoFDJ9bkYMA3tIxsTOr2NmxlWSuoqqi5IvNO2RC8,144041
6
+ imsciences/datafunctions.py,sha256=CWdwme486HMYDn5nshGgayy76PyKEUwZQyN2CkXWA3M,151392
7
7
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
8
8
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
9
9
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
10
10
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
11
11
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
12
- imsciences-0.6.2.6.dist-info/METADATA,sha256=SmZRpsI3cGnW8fQgXurxtsmJgg1GhlotKPOFZ1iPs6o,854
13
- imsciences-0.6.2.6.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
14
- imsciences-0.6.2.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- imsciences-0.6.2.6.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
16
- imsciences-0.6.2.6.dist-info/RECORD,,
12
+ imsciences-0.6.2.8.dist-info/METADATA,sha256=t1SXLwd-9YFA09A3vFdEoNjDkwIZR2hkE8Aecf10tY0,854
13
+ imsciences-0.6.2.8.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
14
+ imsciences-0.6.2.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ imsciences-0.6.2.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
16
+ imsciences-0.6.2.8.dist-info/RECORD,,