imsciences 0.6.2.6__py3-none-any.whl → 0.6.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +181 -13
- {imsciences-0.6.2.6.dist-info → imsciences-0.6.2.7.dist-info}/METADATA +1 -1
- {imsciences-0.6.2.6.dist-info → imsciences-0.6.2.7.dist-info}/RECORD +6 -6
- {imsciences-0.6.2.6.dist-info → imsciences-0.6.2.7.dist-info}/PKG-INFO-IMS-24Ltp-3 +0 -0
- {imsciences-0.6.2.6.dist-info → imsciences-0.6.2.7.dist-info}/WHEEL +0 -0
- {imsciences-0.6.2.6.dist-info → imsciences-0.6.2.7.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -114,8 +114,8 @@ class dataprocessing:
|
|
|
114
114
|
|
|
115
115
|
print("\n18. apply_lookup_table_for_columns")
|
|
116
116
|
print(" - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
|
|
117
|
-
print(" - Usage:
|
|
118
|
-
print(" - Example:
|
|
117
|
+
print(" - Usage: apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')")
|
|
118
|
+
print(" - Example: apply_lookup_table_for_columns(df, col_names, {'spend':'spd','clicks':'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')")
|
|
119
119
|
|
|
120
120
|
print("\n19. aggregate_daily_to_wc_wide")
|
|
121
121
|
print(" - Description: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.")
|
|
@@ -226,6 +226,16 @@ class dataprocessing:
|
|
|
226
226
|
print(" - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
|
|
227
227
|
print(" - Usage: compare_overlap(df1, df2, date_col)")
|
|
228
228
|
print(" - Example: compare_overlap(df_1, df_2, 'obs')")
|
|
229
|
+
|
|
230
|
+
print("\n41. week_commencing_2_week_commencing_conversion")
|
|
231
|
+
print(" - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
|
|
232
|
+
print(" - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
|
|
233
|
+
print(" - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
|
|
234
|
+
|
|
235
|
+
print("\n42. week_commencing_2_week_commencing_conversion")
|
|
236
|
+
print(" - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
|
|
237
|
+
print(" - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
|
|
238
|
+
print(" - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
|
|
229
239
|
|
|
230
240
|
|
|
231
241
|
|
|
@@ -1488,7 +1498,7 @@ class dataprocessing:
|
|
|
1488
1498
|
df[new_col_name] = df[column_name].apply(categorize_text)
|
|
1489
1499
|
return df
|
|
1490
1500
|
|
|
1491
|
-
def compare_overlap(
|
|
1501
|
+
def compare_overlap(df1, df2, date_col):
|
|
1492
1502
|
"""
|
|
1493
1503
|
Compare overlapping periods between two DataFrames and provide a summary of total differences.
|
|
1494
1504
|
|
|
@@ -1515,21 +1525,23 @@ class dataprocessing:
|
|
|
1515
1525
|
# Merge the dataframes on the date column to align data for comparison
|
|
1516
1526
|
merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
|
|
1517
1527
|
|
|
1528
|
+
# Get the common columns between the two DataFrames, excluding the date column
|
|
1529
|
+
common_cols = [col for col in df1.columns if col != date_col and col in df2.columns]
|
|
1530
|
+
|
|
1518
1531
|
# Initialize a list to collect total differences for each column
|
|
1519
1532
|
total_diff_list = []
|
|
1520
1533
|
|
|
1521
|
-
#
|
|
1534
|
+
# Create a DataFrame for the differences
|
|
1522
1535
|
diff_df = pd.DataFrame({date_col: merged_df[date_col]}) # Initialize diff_df with the date column
|
|
1523
1536
|
|
|
1524
|
-
for col in
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
total_diff_list.append({'Column': col, 'Total Difference': total_diff})
|
|
1537
|
+
for col in common_cols:
|
|
1538
|
+
# Calculate the difference for each row
|
|
1539
|
+
diff_col = f'diff_{col}'
|
|
1540
|
+
diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
|
|
1541
|
+
|
|
1542
|
+
# Calculate the total difference for the column and add it to the list
|
|
1543
|
+
total_diff = diff_df[diff_col].sum()
|
|
1544
|
+
total_diff_list.append({'Column': col, 'Total Difference': total_diff})
|
|
1533
1545
|
|
|
1534
1546
|
# Create a DataFrame for the summary of total differences
|
|
1535
1547
|
total_diff_df = pd.DataFrame(total_diff_list)
|
|
@@ -1541,6 +1553,162 @@ class dataprocessing:
|
|
|
1541
1553
|
|
|
1542
1554
|
return diff_df, total_diff_df
|
|
1543
1555
|
|
|
1556
|
+
# Convert week commencing col (should be most likely monday to sunday or vice versa)
|
|
1557
|
+
def week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun'):
|
|
1558
|
+
"""
|
|
1559
|
+
Convert week commencing column in a DataFrame to the start of another day specified.
|
|
1560
|
+
|
|
1561
|
+
Args:
|
|
1562
|
+
df (pandas.DataFrame): The DataFrame containing the date-based data.
|
|
1563
|
+
date_col (str): The name of the date column in the DataFrame.
|
|
1564
|
+
week_commencing (str, optional): The day of the week that the week starts on ('sun' for Sunday, 'mon' for Monday, etc.). Defaults to 'sun'.
|
|
1565
|
+
|
|
1566
|
+
Returns:
|
|
1567
|
+
pandas.DataFrame: The original DataFrame with an additional column indicating the start of the week.
|
|
1568
|
+
"""
|
|
1569
|
+
# Week commencing dictionary
|
|
1570
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
1571
|
+
df['week_start_'+ week_commencing] = df[date_col].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1572
|
+
|
|
1573
|
+
return df
|
|
1574
|
+
|
|
1575
|
+
def plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
|
|
1576
|
+
"""
|
|
1577
|
+
Plot various types of charts using Plotly.
|
|
1578
|
+
|
|
1579
|
+
Args:
|
|
1580
|
+
df (pandas.DataFrame): DataFrame containing the data.
|
|
1581
|
+
date_col (str): The name of the column with date information.
|
|
1582
|
+
value_cols (list): List of columns to plot.
|
|
1583
|
+
chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', 'pie', 'box', 'heatmap', 'area', 'bubble', 'funnel', 'waterfall', 'contour', 'scatter3d').
|
|
1584
|
+
title (str): Title of the chart.
|
|
1585
|
+
x_title (str): Title of the x-axis.
|
|
1586
|
+
y_title (str): Title of the y-axis.
|
|
1587
|
+
**kwargs: Additional keyword arguments for customization.
|
|
1588
|
+
|
|
1589
|
+
Returns:
|
|
1590
|
+
plotly.graph_objects.Figure: The Plotly figure object.
|
|
1591
|
+
"""
|
|
1592
|
+
# Ensure the date column is in datetime format
|
|
1593
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
1594
|
+
|
|
1595
|
+
# Initialize the figure
|
|
1596
|
+
fig = go.Figure()
|
|
1597
|
+
|
|
1598
|
+
# Make sure the date col is excluded from the line cols
|
|
1599
|
+
value_cols = [x for x in value_cols if x!=date_col]
|
|
1600
|
+
|
|
1601
|
+
# Add each value column to the plot based on the chart type
|
|
1602
|
+
for col in value_cols:
|
|
1603
|
+
if chart_type == 'line':
|
|
1604
|
+
fig.add_trace(go.Scatter(
|
|
1605
|
+
x=df[date_col],
|
|
1606
|
+
y=df[col],
|
|
1607
|
+
mode='lines',
|
|
1608
|
+
name=col,
|
|
1609
|
+
**kwargs
|
|
1610
|
+
))
|
|
1611
|
+
elif chart_type == 'bar':
|
|
1612
|
+
fig.add_trace(go.Bar(
|
|
1613
|
+
x=df[date_col],
|
|
1614
|
+
y=df[col],
|
|
1615
|
+
name=col,
|
|
1616
|
+
**kwargs
|
|
1617
|
+
))
|
|
1618
|
+
elif chart_type == 'scatter':
|
|
1619
|
+
fig.add_trace(go.Scatter(
|
|
1620
|
+
x=df[date_col],
|
|
1621
|
+
y=df[col],
|
|
1622
|
+
mode='markers',
|
|
1623
|
+
name=col,
|
|
1624
|
+
**kwargs
|
|
1625
|
+
))
|
|
1626
|
+
elif chart_type == 'histogram':
|
|
1627
|
+
fig.add_trace(go.Histogram(
|
|
1628
|
+
x=df[col],
|
|
1629
|
+
name=col,
|
|
1630
|
+
**kwargs
|
|
1631
|
+
))
|
|
1632
|
+
elif chart_type == 'pie':
|
|
1633
|
+
fig.add_trace(go.Pie(
|
|
1634
|
+
labels=df[date_col], # or another column for labels
|
|
1635
|
+
values=df[col],
|
|
1636
|
+
name=col,
|
|
1637
|
+
**kwargs
|
|
1638
|
+
))
|
|
1639
|
+
elif chart_type == 'box':
|
|
1640
|
+
fig.add_trace(go.Box(
|
|
1641
|
+
y=df[col],
|
|
1642
|
+
name=col,
|
|
1643
|
+
**kwargs
|
|
1644
|
+
))
|
|
1645
|
+
elif chart_type == 'heatmap':
|
|
1646
|
+
fig.add_trace(go.Heatmap(
|
|
1647
|
+
z=df.pivot_table(index=date_col, columns=value_cols[0], values=value_cols[1]),
|
|
1648
|
+
x=df[value_cols[0]],
|
|
1649
|
+
y=df[date_col],
|
|
1650
|
+
**kwargs
|
|
1651
|
+
))
|
|
1652
|
+
elif chart_type == 'area':
|
|
1653
|
+
fig.add_trace(go.Scatter(
|
|
1654
|
+
x=df[date_col],
|
|
1655
|
+
y=df[col],
|
|
1656
|
+
mode='lines', # Use 'lines+markers' if you want markers
|
|
1657
|
+
fill='tozeroy', # Fill the area under the line
|
|
1658
|
+
name=col,
|
|
1659
|
+
**kwargs
|
|
1660
|
+
))
|
|
1661
|
+
elif chart_type == 'bubble':
|
|
1662
|
+
fig.add_trace(go.Scatter(
|
|
1663
|
+
x=df[value_cols[0]],
|
|
1664
|
+
y=df[value_cols[1]],
|
|
1665
|
+
mode='markers',
|
|
1666
|
+
marker=dict(size=df[value_cols[2]]),
|
|
1667
|
+
name='Bubble Chart',
|
|
1668
|
+
**kwargs
|
|
1669
|
+
))
|
|
1670
|
+
elif chart_type == 'funnel':
|
|
1671
|
+
fig.add_trace(go.Funnel(
|
|
1672
|
+
y=df[date_col],
|
|
1673
|
+
x=df[col],
|
|
1674
|
+
**kwargs
|
|
1675
|
+
))
|
|
1676
|
+
elif chart_type == 'waterfall':
|
|
1677
|
+
fig.add_trace(go.Waterfall(
|
|
1678
|
+
x=df[date_col],
|
|
1679
|
+
y=df[col],
|
|
1680
|
+
measure=df[value_cols[1]], # measures like 'increase', 'decrease', 'total'
|
|
1681
|
+
**kwargs
|
|
1682
|
+
))
|
|
1683
|
+
elif chart_type == 'contour':
|
|
1684
|
+
fig.add_trace(go.Contour(
|
|
1685
|
+
z=df.pivot_table(index=value_cols[0], columns=value_cols[1], values=value_cols[2]),
|
|
1686
|
+
x=df[value_cols[0]],
|
|
1687
|
+
y=df[value_cols[1]],
|
|
1688
|
+
**kwargs
|
|
1689
|
+
))
|
|
1690
|
+
elif chart_type == 'scatter3d':
|
|
1691
|
+
fig.add_trace(go.Scatter3d(
|
|
1692
|
+
x=df[value_cols[0]],
|
|
1693
|
+
y=df[value_cols[1]],
|
|
1694
|
+
z=df[value_cols[2]],
|
|
1695
|
+
mode='markers',
|
|
1696
|
+
**kwargs
|
|
1697
|
+
))
|
|
1698
|
+
else:
|
|
1699
|
+
raise ValueError(f"Unsupported chart type: {chart_type}")
|
|
1700
|
+
|
|
1701
|
+
# Update the layout of the figure
|
|
1702
|
+
fig.update_layout(
|
|
1703
|
+
title=title,
|
|
1704
|
+
xaxis_title=x_title,
|
|
1705
|
+
yaxis_title=y_title,
|
|
1706
|
+
legend_title='Series',
|
|
1707
|
+
template='plotly_dark'
|
|
1708
|
+
)
|
|
1709
|
+
|
|
1710
|
+
return fig
|
|
1711
|
+
|
|
1544
1712
|
########################################################################################################################################
|
|
1545
1713
|
########################################################################################################################################
|
|
1546
1714
|
|
|
@@ -3,14 +3,14 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
|
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
5
|
imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
|
|
6
|
-
imsciences/datafunctions.py,sha256=
|
|
6
|
+
imsciences/datafunctions.py,sha256=PGuvgJIurXGWM8E1M_w9BijUJGBm5FTaZVE-C1_sPog,151382
|
|
7
7
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
8
8
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
9
9
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
10
10
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
11
11
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
12
|
-
imsciences-0.6.2.
|
|
13
|
-
imsciences-0.6.2.
|
|
14
|
-
imsciences-0.6.2.
|
|
15
|
-
imsciences-0.6.2.
|
|
16
|
-
imsciences-0.6.2.
|
|
12
|
+
imsciences-0.6.2.7.dist-info/METADATA,sha256=0IT7pWYxsHXerkqBVKsS2Zh1_6qwn1u7NL3mK44c4tk,854
|
|
13
|
+
imsciences-0.6.2.7.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
14
|
+
imsciences-0.6.2.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
15
|
+
imsciences-0.6.2.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
16
|
+
imsciences-0.6.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|