opsci-toolbox 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opsci_toolbox/apis/rapidapi_helpers.py +120 -21
- opsci_toolbox/apis/webscraping.py +186 -59
- opsci_toolbox/apis/youtube_helpers.py +103 -16
- opsci_toolbox/helpers/common.py +368 -254
- opsci_toolbox/helpers/cv.py +50 -60
- opsci_toolbox/helpers/dataviz.py +255 -184
- opsci_toolbox/helpers/dates.py +17 -18
- opsci_toolbox/helpers/nlp.py +154 -114
- opsci_toolbox/helpers/nlp_cuml.py +389 -36
- opsci_toolbox/helpers/sna.py +509 -0
- opsci_toolbox/helpers/sql.py +53 -0
- {opsci_toolbox-0.0.6.dist-info → opsci_toolbox-0.0.8.dist-info}/METADATA +14 -9
- opsci_toolbox-0.0.8.dist-info/RECORD +22 -0
- opsci_toolbox-0.0.6.dist-info/RECORD +0 -21
- {opsci_toolbox-0.0.6.dist-info → opsci_toolbox-0.0.8.dist-info}/WHEEL +0 -0
- {opsci_toolbox-0.0.6.dist-info → opsci_toolbox-0.0.8.dist-info}/top_level.txt +0 -0
opsci_toolbox/helpers/dataviz.py
CHANGED
@@ -373,7 +373,7 @@ def get_convex_hull_coord(points: np.array, interpolate_curve: bool = True) -> t
|
|
373
373
|
|
374
374
|
# return fig
|
375
375
|
|
376
|
-
def create_scatter_plot(df: pd.DataFrame, col_x: str, col_y: str, col_category: str, color_palette: dict, col_color: str, col_size: str, col_text: str, col_legend: list = [], title: str = "Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", width: int = 1000, height: int = 1000, xaxis_range: list =None, yaxis_range: list =None, size_value: int = 4, opacity: float = 0.8, maxdisplayed: int = 0, mode: str = "markers", textposition: str = "bottom center", plot_bgcolor: str = None, paper_bgcolor: str = None, yaxis_showgrid: bool = False, xaxis_showgrid: bool = False, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", colorscale: str = 'Viridis', showscale: bool = True, template: str = "plotly") -> go.Figure:
|
376
|
+
def create_scatter_plot(df: pd.DataFrame, col_x: str, col_y: str, col_category: str, color_palette: dict, col_color: str, col_size: str, col_text: str, col_legend: list = [], title: str = "Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", width: int = 1000, height: int = 1000, xaxis_range: list =None, yaxis_range: list =None, size_value: int = 4, opacity: float = 0.8, maxdisplayed: int = 0, mode: str = "markers", textposition: str = "bottom center", plot_bgcolor: str = None, paper_bgcolor: str = None, yaxis_showgrid: bool = False, xaxis_showgrid: bool = False, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", colorscale: str = 'Viridis', showscale: bool = True, template: str = "plotly", font_size:int =16) -> go.Figure:
|
377
377
|
"""
|
378
378
|
Create a scatter plot.
|
379
379
|
|
@@ -522,7 +522,8 @@ def create_scatter_plot(df: pd.DataFrame, col_x: str, col_y: str, col_category:
|
|
522
522
|
template=template,
|
523
523
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
524
524
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
525
|
-
font_family="
|
525
|
+
font_family="Inria Sans", # font
|
526
|
+
font_size=font_size
|
526
527
|
|
527
528
|
)
|
528
529
|
return fig
|
@@ -557,7 +558,7 @@ def add_annotations(fig: go.Figure, df: pd.DataFrame, col_x: str, col_y: str, co
|
|
557
558
|
showarrow=True,
|
558
559
|
arrowhead=1,
|
559
560
|
font=dict(
|
560
|
-
family="
|
561
|
+
family="Inria Sans",
|
561
562
|
size=width / label_size_ratio,
|
562
563
|
color=font_color
|
563
564
|
),
|
@@ -571,7 +572,7 @@ def add_annotations(fig: go.Figure, df: pd.DataFrame, col_x: str, col_y: str, co
|
|
571
572
|
|
572
573
|
return fig
|
573
574
|
|
574
|
-
def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category: str, color_palette: dict, col_size: str, col_text: str, title: str = "3D Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", z_axis_label: str = "Z-axis", width: int = 1000, height: int = 1000, xaxis_range: list = None, yaxis_range: list = None, zaxis_range: list = None, size_value: int = 4, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", template: str = "plotly") -> go.Figure:
|
575
|
+
def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category: str, color_palette: dict, col_size: str, col_text: str, title: str = "3D Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", z_axis_label: str = "Z-axis", width: int = 1000, height: int = 1000, xaxis_range: list = None, yaxis_range: list = None, zaxis_range: list = None, size_value: int = 4, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", template: str = "plotly", font_size:int =16) -> go.Figure:
|
575
576
|
"""
|
576
577
|
Create a 3D scatter plot.
|
577
578
|
|
@@ -677,7 +678,8 @@ def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category
|
|
677
678
|
zaxis_range = [df[col_z].min()-0.1,df[col_z].max()+0.1]
|
678
679
|
fig.update_layout(
|
679
680
|
|
680
|
-
font_family="
|
681
|
+
font_family="Inria Sans", # font
|
682
|
+
font_size = font_size,
|
681
683
|
title=title, #graph title
|
682
684
|
xaxis_title=x_axis_label, #xaxis title
|
683
685
|
yaxis_title=y_axis_label, #yaxis title
|
@@ -723,7 +725,7 @@ def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category
|
|
723
725
|
return fig
|
724
726
|
|
725
727
|
|
726
|
-
def fig_bar_trend(df: pd.DataFrame, col_x: str, bar_measure: str, trend_measure: str, x_name: str = "X", bar_name: str = "metric1", trend_name: str = "metric2", marker_color: str = '
|
728
|
+
def fig_bar_trend(df: pd.DataFrame, col_x: str, bar_measure: str, trend_measure: str, x_name: str = "X", bar_name: str = "metric1", trend_name: str = "metric2", marker_color: str = '#d399ff', line_color: str = '#bd66ff', title_text: str = "Couverture & Résonance", width: int = 1500, height: int = 700, xaxis_tickangle: int = 0, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, template: str = "plotly", font_size:int =16) -> go.Figure:
|
727
729
|
"""
|
728
730
|
Display a graph that combines bar and trend chart to compare 2 metrics.
|
729
731
|
|
@@ -800,7 +802,8 @@ def fig_bar_trend(df: pd.DataFrame, col_x: str, bar_measure: str, trend_measure:
|
|
800
802
|
xaxis_showgrid=False,
|
801
803
|
yaxis_showline=False,
|
802
804
|
yaxis_showgrid=False,
|
803
|
-
font_family="
|
805
|
+
font_family="Inria Sans",
|
806
|
+
font_size = font_size,
|
804
807
|
template=template,
|
805
808
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
806
809
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -932,7 +935,8 @@ def density_map(df_posts: pd.DataFrame,
|
|
932
935
|
opacity: float = 0.3,
|
933
936
|
plot_bgcolor: str = None,
|
934
937
|
paper_bgcolor: str = None,
|
935
|
-
template: str = "plotly"
|
938
|
+
template: str = "plotly",
|
939
|
+
font_size:int = 16) -> go.Figure:
|
936
940
|
"""
|
937
941
|
Display a 2D histogram with contours and scattered dots.
|
938
942
|
|
@@ -944,7 +948,7 @@ def density_map(df_posts: pd.DataFrame,
|
|
944
948
|
col_engagement (str): Column name corresponding to a metric.
|
945
949
|
col_text (str): Column name corresponding to a text separated by |.
|
946
950
|
col_text_dots (str): Column name corresponding to the text for dots.
|
947
|
-
colorscale (str, optional): Possible values are https://plotly.com/python/builtin-colorscales
|
951
|
+
colorscale (str, optional): Possible values are 'https://plotly.com/python/builtin-colorscales/'. Defaults to "Portland".
|
948
952
|
marker_color (str, optional): Dots color value. Defaults to "#ff7f0e".
|
949
953
|
arrow_color (str, optional): Arrow pointing to topic centroid color value. Defaults to "#ff7f0e".
|
950
954
|
width (int, optional): Width of the plot. Defaults to 1000.
|
@@ -996,15 +1000,16 @@ def density_map(df_posts: pd.DataFrame,
|
|
996
1000
|
|
997
1001
|
#paramètres cosmetiques
|
998
1002
|
fig_density.update_layout(
|
999
|
-
|
1003
|
+
font_family="Inria Sans", # font
|
1004
|
+
font_size = font_size,
|
1000
1005
|
width=width,
|
1001
1006
|
height=height,
|
1002
|
-
margin=dict(
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
),
|
1007
|
+
# margin=dict(
|
1008
|
+
# t=width / 15,
|
1009
|
+
# b=width / 25,
|
1010
|
+
# r=width / 25,
|
1011
|
+
# l=width / 25,
|
1012
|
+
# ),
|
1008
1013
|
title=dict(text=title_text, font=dict(size=width / 40)),
|
1009
1014
|
xaxis=dict(showline=False, zeroline=False, showgrid=False, showticklabels=False),
|
1010
1015
|
yaxis=dict(showline=False, zeroline=False, showgrid=False, showticklabels=False),
|
@@ -1045,7 +1050,7 @@ def density_map(df_posts: pd.DataFrame,
|
|
1045
1050
|
showarrow=True,
|
1046
1051
|
arrowhead=1,
|
1047
1052
|
font=dict(
|
1048
|
-
family="
|
1053
|
+
family="Inria Sans",
|
1049
1054
|
size=width / label_size_ratio,
|
1050
1055
|
color="blue",
|
1051
1056
|
),
|
@@ -1089,7 +1094,8 @@ def topic_heatmap(df: pd.DataFrame,
|
|
1089
1094
|
col_y: str = "topic_y",
|
1090
1095
|
col_topic: str = "soft_topic",
|
1091
1096
|
color_continuous_scale: str = 'GnBu',
|
1092
|
-
title: str = "Similarity between topics"
|
1097
|
+
title: str = "Similarity between topics",
|
1098
|
+
font_size:int = 16) -> go.Figure:
|
1093
1099
|
"""
|
1094
1100
|
Display a heatmap representing the similarity between topics.
|
1095
1101
|
|
@@ -1115,6 +1121,8 @@ def topic_heatmap(df: pd.DataFrame,
|
|
1115
1121
|
)
|
1116
1122
|
|
1117
1123
|
fig.update_layout(
|
1124
|
+
font_family="Inria Sans", # font
|
1125
|
+
font_size = font_size,
|
1118
1126
|
title={
|
1119
1127
|
'text': title,
|
1120
1128
|
'y': .95,
|
@@ -1129,8 +1137,8 @@ def topic_heatmap(df: pd.DataFrame,
|
|
1129
1137
|
height=1000,
|
1130
1138
|
hoverlabel=dict(
|
1131
1139
|
bgcolor="white",
|
1132
|
-
|
1133
|
-
|
1140
|
+
font_family="Inria Sans", # font
|
1141
|
+
font_size = font_size,
|
1134
1142
|
),
|
1135
1143
|
)
|
1136
1144
|
fig.update_layout(showlegend=True)
|
@@ -1144,7 +1152,7 @@ def generate_wordcloud(df: pd.DataFrame,
|
|
1144
1152
|
height: int = 1500,
|
1145
1153
|
dpi: int = 300,
|
1146
1154
|
background_color: str = 'white',
|
1147
|
-
font_path: str = "font/
|
1155
|
+
font_path: str = "font/InriaSans-Bold.ttf",
|
1148
1156
|
colormap: str = "Viridis",
|
1149
1157
|
show: bool = False) -> WordCloud:
|
1150
1158
|
"""
|
@@ -1186,7 +1194,8 @@ def create_radar(df: pd.DataFrame,
|
|
1186
1194
|
height: int = 1000,
|
1187
1195
|
template: str = "ggplot2",
|
1188
1196
|
plot_bgcolor: str = None,
|
1189
|
-
paper_bgcolor: str = None
|
1197
|
+
paper_bgcolor: str = None,
|
1198
|
+
font_size:int = 16) -> go.Figure:
|
1190
1199
|
"""
|
1191
1200
|
Create a radar chart.
|
1192
1201
|
|
@@ -1241,7 +1250,8 @@ def create_radar(df: pd.DataFrame,
|
|
1241
1250
|
# bgcolor="white",
|
1242
1251
|
),
|
1243
1252
|
showlegend=True,
|
1244
|
-
font_family="
|
1253
|
+
font_family="Inria Sans",
|
1254
|
+
font_size = 16,
|
1245
1255
|
font_color="SlateGrey",
|
1246
1256
|
title=title,
|
1247
1257
|
width=width, #plot size
|
@@ -1271,7 +1281,8 @@ def bar_subplots(df: pd.DataFrame,
|
|
1271
1281
|
height: int = 35,
|
1272
1282
|
plot_bgcolor: str = None,
|
1273
1283
|
paper_bgcolor: str = None,
|
1274
|
-
showlegend: bool = True
|
1284
|
+
showlegend: bool = True,
|
1285
|
+
font_size:int=16) -> go.Figure:
|
1275
1286
|
"""
|
1276
1287
|
Create subplots of horizontal bar charts.
|
1277
1288
|
|
@@ -1373,7 +1384,8 @@ def bar_subplots(df: pd.DataFrame,
|
|
1373
1384
|
uniformtext_minsize=7, # Adjust the minimum size of text to avoid overlap
|
1374
1385
|
margin=dict(l=75, r=75, t=75, b=50), # margins around the plot
|
1375
1386
|
showlegend=showlegend, # legend display
|
1376
|
-
font_family="
|
1387
|
+
font_family="Inria Sans", # font
|
1388
|
+
font_size=font_size,
|
1377
1389
|
template=template, # template, possible values : plotly, plotly_white, plotly_dark, ggplot2, seaborn, simple_white, none
|
1378
1390
|
plot_bgcolor=plot_bgcolor, # background color (plot)
|
1379
1391
|
paper_bgcolor=paper_bgcolor, # background color (around plot)
|
@@ -1407,7 +1419,8 @@ def pie_subplots(df: pd.DataFrame,
|
|
1407
1419
|
height: int = 150,
|
1408
1420
|
plot_bgcolor: str = None,
|
1409
1421
|
paper_bgcolor: str = None,
|
1410
|
-
showlegend: bool = True
|
1422
|
+
showlegend: bool = True,
|
1423
|
+
font_size=16) -> go.Figure:
|
1411
1424
|
"""
|
1412
1425
|
Create subplots of pie charts.
|
1413
1426
|
|
@@ -1476,7 +1489,8 @@ def pie_subplots(df: pd.DataFrame,
|
|
1476
1489
|
uniformtext_minsize=7,
|
1477
1490
|
margin=dict(l=75, r=75, t=75, b=50),
|
1478
1491
|
showlegend=showlegend,
|
1479
|
-
font_family="
|
1492
|
+
font_family="Inria Sans",
|
1493
|
+
font_size=font_size,
|
1480
1494
|
template=template,
|
1481
1495
|
plot_bgcolor=plot_bgcolor,
|
1482
1496
|
paper_bgcolor=paper_bgcolor,
|
@@ -1510,7 +1524,8 @@ def horizontal_stacked_bars(df: pd.DataFrame,
|
|
1510
1524
|
vertical_spacing: float = 0.08,
|
1511
1525
|
plot_bgcolor: str = None,
|
1512
1526
|
paper_bgcolor: str = None,
|
1513
|
-
template: str = "plotly"
|
1527
|
+
template: str = "plotly",
|
1528
|
+
font_size: int = 16) -> go.Figure:
|
1514
1529
|
"""
|
1515
1530
|
Create horizontal stacked bar plots.
|
1516
1531
|
|
@@ -1597,7 +1612,8 @@ def horizontal_stacked_bars(df: pd.DataFrame,
|
|
1597
1612
|
yaxis_showgrid=False,
|
1598
1613
|
uniformtext_minsize=8,
|
1599
1614
|
uniformtext_mode='hide',
|
1600
|
-
font_family="
|
1615
|
+
font_family="Inria Sans",
|
1616
|
+
font_size=font_size,
|
1601
1617
|
template=template,
|
1602
1618
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
1603
1619
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -1624,7 +1640,8 @@ def bar_trend_per_day(df: pd.DataFrame,
|
|
1624
1640
|
line_color: str = "#273746",
|
1625
1641
|
plot_bgcolor: str = None,
|
1626
1642
|
paper_bgcolor: str = None,
|
1627
|
-
template: str = "plotly"
|
1643
|
+
template: str = "plotly",
|
1644
|
+
font_size: int = 16) -> go.Figure:
|
1628
1645
|
"""
|
1629
1646
|
Creates a Plotly stacked bar chart with a secondary line plot for two metrics over time.
|
1630
1647
|
|
@@ -1694,7 +1711,8 @@ def bar_trend_per_day(df: pd.DataFrame,
|
|
1694
1711
|
yaxis_showgrid=False,
|
1695
1712
|
uniformtext_minsize=8,
|
1696
1713
|
uniformtext_mode='hide',
|
1697
|
-
font_family="
|
1714
|
+
font_family="Inria Sans",
|
1715
|
+
font_size=font_size,
|
1698
1716
|
template=template,
|
1699
1717
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
1700
1718
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -1731,30 +1749,31 @@ def bar_trend_per_day_per_cat(df: pd.DataFrame,
|
|
1731
1749
|
height: int = 700,
|
1732
1750
|
plot_bgcolor: str = None,
|
1733
1751
|
paper_bgcolor: str = None,
|
1734
|
-
template: str = "plotly"
|
1752
|
+
template: str = "plotly",
|
1753
|
+
font_size: int = 16) -> go.Figure:
|
1735
1754
|
"""
|
1736
1755
|
Creates a Plotly stacked bar chart with multiple categories, each represented as a separate subplot.
|
1737
1756
|
|
1738
|
-
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
|
1750
|
-
|
1751
|
-
|
1752
|
-
|
1753
|
-
|
1754
|
-
|
1757
|
+
Args:
|
1758
|
+
df (pd.DataFrame): The DataFrame containing the data.
|
1759
|
+
col_date (str): The name of the column containing dates.
|
1760
|
+
col_cat (str): The name of the column containing categories.
|
1761
|
+
col_metric1 (str): The name of the column containing the first metric values.
|
1762
|
+
col_metric2 (str): The name of the column containing the second metric values.
|
1763
|
+
col_color (str): The name of the column containing the color codes for each category.
|
1764
|
+
xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
|
1765
|
+
y1_axis_title (str, optional): The title for the primary y-axis. Defaults to "Verbatims".
|
1766
|
+
y2_axis_title (str, optional): The title for the secondary y-axis. Defaults to "Engagements".
|
1767
|
+
title_text (str, optional): The title text for the chart. Defaults to "Trend - couverture & résonance".
|
1768
|
+
vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
|
1769
|
+
width (int, optional): The width of the chart. Defaults to 1500.
|
1770
|
+
height (int, optional): The height of the chart. Defaults to 700.
|
1771
|
+
plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1772
|
+
paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1773
|
+
template (str, optional): The template of the chart. Defaults to "plotly".
|
1755
1774
|
|
1756
1775
|
Returns:
|
1757
|
-
|
1776
|
+
fig (go.Figure): The Plotly Figure object representing the stacked bar chart with subplots for each category.
|
1758
1777
|
"""
|
1759
1778
|
fig = make_subplots(
|
1760
1779
|
rows = 2, # number of rows
|
@@ -1812,7 +1831,8 @@ def bar_trend_per_day_per_cat(df: pd.DataFrame,
|
|
1812
1831
|
yaxis_showgrid=False,
|
1813
1832
|
uniformtext_minsize=8,
|
1814
1833
|
uniformtext_mode='hide',
|
1815
|
-
font_family="
|
1834
|
+
font_family="Inria Sans",
|
1835
|
+
font_size=font_size,
|
1816
1836
|
template=template,
|
1817
1837
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
1818
1838
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -1846,25 +1866,26 @@ def pie(df: pd.DataFrame,
|
|
1846
1866
|
height: int = 1000,
|
1847
1867
|
plot_bgcolor: str = None,
|
1848
1868
|
paper_bgcolor: str = None,
|
1849
|
-
showlegend: bool = True
|
1869
|
+
showlegend: bool = True,
|
1870
|
+
font_size: int = 16) -> go.Figure:
|
1850
1871
|
"""
|
1851
1872
|
Creates a Plotly pie chart.
|
1852
1873
|
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1874
|
+
Args:
|
1875
|
+
df (pd.DataFrame): The DataFrame containing the data.
|
1876
|
+
col_x (str): The name of the column containing the labels for the pie chart slices.
|
1877
|
+
col_y (str): The name of the column containing the values for the pie chart slices.
|
1878
|
+
col_color (str): The name of the column containing the colors for the pie chart slices.
|
1879
|
+
title (str, optional): The title for the pie chart. Defaults to "Sentiment".
|
1880
|
+
template (str, optional): The template of the chart. Defaults to "plotly".
|
1881
|
+
width (int, optional): The width of the chart. Defaults to 1000.
|
1882
|
+
height (int, optional): The height of the chart. Defaults to 1000.
|
1883
|
+
plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1884
|
+
paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1885
|
+
showlegend (bool, optional): Whether to show the legend. Defaults to True.
|
1865
1886
|
|
1866
1887
|
Returns:
|
1867
|
-
|
1888
|
+
fig (go.Figure): The Plotly Figure object representing the pie chart.
|
1868
1889
|
"""
|
1869
1890
|
fig = go.Figure()
|
1870
1891
|
fig.add_trace(go.Pie(
|
@@ -1886,7 +1907,8 @@ def pie(df: pd.DataFrame,
|
|
1886
1907
|
uniformtext_minsize=7,
|
1887
1908
|
margin=dict(l=75, r=75, t=75, b=50),
|
1888
1909
|
showlegend=showlegend,
|
1889
|
-
font_family="
|
1910
|
+
font_family="Inria Sans",
|
1911
|
+
font_size=font_size,
|
1890
1912
|
template=template,
|
1891
1913
|
plot_bgcolor=plot_bgcolor,
|
1892
1914
|
paper_bgcolor=paper_bgcolor,
|
@@ -1916,27 +1938,30 @@ def bar(df: pd.DataFrame,
|
|
1916
1938
|
plot_bgcolor: str = None,
|
1917
1939
|
paper_bgcolor: str = None,
|
1918
1940
|
template: str = "plotly",
|
1919
|
-
showlegend: bool = True
|
1941
|
+
showlegend: bool = True,
|
1942
|
+
font_size: int = 16,
|
1943
|
+
xaxis_tickangle:int=0) -> go.Figure:
|
1920
1944
|
"""
|
1921
1945
|
Creates a Plotly vertical bar chart.
|
1922
1946
|
|
1923
|
-
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
|
1932
|
-
|
1933
|
-
|
1934
|
-
|
1935
|
-
|
1936
|
-
|
1947
|
+
Args:
|
1948
|
+
df (pd.DataFrame): The DataFrame containing the data.
|
1949
|
+
x (str): The name of the column containing the x-axis values.
|
1950
|
+
y (str): The name of the column containing the y-axis values.
|
1951
|
+
color (str, optional): The color of the bars. Defaults to "indianred".
|
1952
|
+
xaxis_title (str, optional): The title for the x-axis. Defaults to "x".
|
1953
|
+
yaxis_title (str, optional): The title for the y-axis. Defaults to "y".
|
1954
|
+
width (int, optional): The width of the chart. Defaults to 1200.
|
1955
|
+
height (int, optional): The height of the chart. Defaults to 700.
|
1956
|
+
title_text (str, optional): The title text for the chart. Defaults to "".
|
1957
|
+
plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1958
|
+
paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1959
|
+
template (str, optional): The template of the chart. Defaults to "plotly".
|
1960
|
+
showlegend (bool, optional): Whether to show the legend. Defaults to True.
|
1961
|
+
xaxis_tickangle (int, optional) : label angle on x axis
|
1937
1962
|
|
1938
1963
|
Returns:
|
1939
|
-
|
1964
|
+
fig (go.Figure): The Plotly Figure object representing the vertical bar chart.
|
1940
1965
|
"""
|
1941
1966
|
fig = go.Figure()
|
1942
1967
|
fig.add_trace(
|
@@ -1959,14 +1984,15 @@ def bar(df: pd.DataFrame,
|
|
1959
1984
|
showlegend=showlegend,
|
1960
1985
|
width = width,
|
1961
1986
|
height= height,
|
1962
|
-
xaxis_tickangle=
|
1987
|
+
xaxis_tickangle=xaxis_tickangle,
|
1963
1988
|
xaxis_showline=False,
|
1964
1989
|
xaxis_showgrid=False,
|
1965
1990
|
yaxis_showline=False,
|
1966
1991
|
yaxis_showgrid=False,
|
1967
1992
|
uniformtext_minsize=8,
|
1968
1993
|
uniformtext_mode='hide',
|
1969
|
-
font_family="
|
1994
|
+
font_family="Inria Sans",
|
1995
|
+
font_size = font_size,
|
1970
1996
|
template=template,
|
1971
1997
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
1972
1998
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -1984,17 +2010,17 @@ def add_horizontal_line(fig: go.Figure,
|
|
1984
2010
|
"""
|
1985
2011
|
Adds a horizontal line to a Plotly Figure object.
|
1986
2012
|
|
1987
|
-
|
1988
|
-
|
1989
|
-
|
1990
|
-
|
1991
|
-
|
1992
|
-
|
1993
|
-
|
1994
|
-
|
2013
|
+
Args:
|
2014
|
+
fig (go.Figure): The Plotly Figure object to which the horizontal line will be added.
|
2015
|
+
y (float): The y-coordinate of the horizontal line.
|
2016
|
+
line_color (str, optional): The color of the horizontal line. Defaults to "gray".
|
2017
|
+
line_width (float, optional): The width of the horizontal line. Defaults to 1.5.
|
2018
|
+
line_dash (str, optional): The dash style of the horizontal line. Defaults to "dash".
|
2019
|
+
annotation_text (str, optional): The text annotation associated with the horizontal line. Defaults to "Longueur moyenne des textes".
|
2020
|
+
annotation_position (str, optional): The position of the annotation relative to the horizontal line. Defaults to "top right".
|
1995
2021
|
|
1996
2022
|
Returns:
|
1997
|
-
|
2023
|
+
fig (go.Figure): The Plotly Figure object with the horizontal line added.
|
1998
2024
|
"""
|
1999
2025
|
fig.add_hline(
|
2000
2026
|
y=y,
|
@@ -2016,17 +2042,17 @@ def add_vertical_line(fig: go.Figure,
|
|
2016
2042
|
"""
|
2017
2043
|
Adds a vertical line to a Plotly Figure object.
|
2018
2044
|
|
2019
|
-
|
2020
|
-
|
2021
|
-
|
2022
|
-
|
2023
|
-
|
2024
|
-
|
2025
|
-
|
2026
|
-
|
2045
|
+
Args:
|
2046
|
+
fig (go.Figure): The Plotly Figure object to which the vertical line will be added.
|
2047
|
+
x (float): The x-coordinate of the vertical line.
|
2048
|
+
line_color (str, optional): The color of the vertical line. Defaults to "gray".
|
2049
|
+
line_width (float, optional): The width of the vertical line. Defaults to 1.5.
|
2050
|
+
line_dash (str, optional): The dash style of the vertical line. Defaults to "dash".
|
2051
|
+
annotation_text (str, optional): The text annotation associated with the vertical line. Defaults to "Longueur moyenne des textes".
|
2052
|
+
annotation_position (str, optional): The position of the annotation relative to the vertical line. Defaults to "top right".
|
2027
2053
|
|
2028
2054
|
Returns:
|
2029
|
-
|
2055
|
+
fig (go.Figure): The Plotly Figure object with the vertical line added.
|
2030
2056
|
"""
|
2031
2057
|
fig.add_vline(
|
2032
2058
|
x=x,
|
@@ -2059,28 +2085,28 @@ def network_graph(T: nx.Graph,
|
|
2059
2085
|
"""
|
2060
2086
|
Creates a network graph visualization using Plotly.
|
2061
2087
|
|
2062
|
-
|
2063
|
-
|
2064
|
-
|
2065
|
-
|
2066
|
-
|
2067
|
-
|
2068
|
-
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
2076
|
-
|
2077
|
-
|
2078
|
-
|
2079
|
-
|
2080
|
-
|
2088
|
+
Args:
|
2089
|
+
T (nx.Graph): The NetworkX graph object.
|
2090
|
+
col_size (str, optional): The column name for node size. Defaults to "scaled_size".
|
2091
|
+
col_color (str, optional): The column name for node color. Defaults to "modularity_color".
|
2092
|
+
title_text (str, optional): The title for the graph. Defaults to "Analyse de similitudes".
|
2093
|
+
sample_nodes (float, optional): The proportion of nodes to sample for displaying labels. Defaults to 0.15.
|
2094
|
+
show_edges (bool, optional): Whether to display edges. Defaults to True.
|
2095
|
+
show_halo (bool, optional): Whether to display halo around nodes. Defaults to False.
|
2096
|
+
textposition (str, optional): The position of node labels. Defaults to None.
|
2097
|
+
line_color (str, optional): The color of edges. Defaults to "#B7B7B7".
|
2098
|
+
line_dash (str, optional): The dash style of edges. Defaults to "dot".
|
2099
|
+
edge_mode (str, optional): The mode for displaying edges. Defaults to "lines+markers".
|
2100
|
+
node_mode (str, optional): The mode for displaying nodes. Defaults to "markers+text".
|
2101
|
+
opacity (float, optional): The opacity of nodes. Defaults to 0.2.
|
2102
|
+
width (int, optional): The width of the plot. Defaults to 1600.
|
2103
|
+
height (int, optional): The height of the plot. Defaults to 1200.
|
2104
|
+
plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
2105
|
+
paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
2106
|
+
template (str, optional): The template of the plot. Defaults to "plotly".
|
2081
2107
|
|
2082
2108
|
Returns:
|
2083
|
-
|
2109
|
+
fig (go.Figure): The Plotly Figure object representing the network graph visualization.
|
2084
2110
|
"""
|
2085
2111
|
# on construit un dataframe des noeuds à partir des données du graphe pour plus de simplicité
|
2086
2112
|
df_nodes=pd.DataFrame()
|
@@ -2170,7 +2196,7 @@ def network_graph(T: nx.Graph,
|
|
2170
2196
|
hovermode='closest',
|
2171
2197
|
title=title_text,
|
2172
2198
|
titlefont_size=18,
|
2173
|
-
font_family="
|
2199
|
+
font_family="Inria Sans",
|
2174
2200
|
# font_size = 12,
|
2175
2201
|
# uniformtext_minsize=8,
|
2176
2202
|
template=template,
|
@@ -2198,19 +2224,20 @@ def richesse_lexicale(df: pd.DataFrame,
|
|
2198
2224
|
title: str = "Richesse lexicale",
|
2199
2225
|
width: int = 1200,
|
2200
2226
|
height: int = 1000,
|
2201
|
-
template: str = "plotly"
|
2227
|
+
template: str = "plotly",
|
2228
|
+
font_size: int = 16) -> go.Figure:
|
2202
2229
|
"""
|
2203
2230
|
Creates a lexical richness visualization using Plotly.
|
2204
2231
|
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2210
|
-
|
2232
|
+
Args:
|
2233
|
+
df (pd.DataFrame): The DataFrame containing word frequency data.
|
2234
|
+
title (str, optional): The title for the plot. Defaults to "Richesse lexicale".
|
2235
|
+
width (int, optional): The width of the plot. Defaults to 1200.
|
2236
|
+
height (int, optional): The height of the plot. Defaults to 1000.
|
2237
|
+
template (str, optional): The template of the plot. Defaults to "plotly".
|
2211
2238
|
|
2212
2239
|
Returns:
|
2213
|
-
|
2240
|
+
fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization.
|
2214
2241
|
"""
|
2215
2242
|
df = create_frequency_table(df, "freq")
|
2216
2243
|
fig_richesse = go.Figure()
|
@@ -2225,6 +2252,8 @@ def richesse_lexicale(df: pd.DataFrame,
|
|
2225
2252
|
)
|
2226
2253
|
fig_richesse.update_layout(title=title,
|
2227
2254
|
xaxis_title="Rank",
|
2255
|
+
font_family="Inria Sans",
|
2256
|
+
font_size = font_size,
|
2228
2257
|
width=width,
|
2229
2258
|
height=height,
|
2230
2259
|
template=template)
|
@@ -2237,20 +2266,21 @@ def richesse_lexicale_per_topic(df: pd.DataFrame,
|
|
2237
2266
|
title: str = "Richesse lexicale par topic",
|
2238
2267
|
width: int = 1200,
|
2239
2268
|
height: int = 1000,
|
2240
|
-
template: str = "plotly"
|
2269
|
+
template: str = "plotly",
|
2270
|
+
font_size: int = 16) -> go.Figure:
|
2241
2271
|
"""
|
2242
2272
|
Creates a lexical richness visualization per topic using Plotly.
|
2243
2273
|
|
2244
|
-
|
2245
|
-
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2274
|
+
Args:
|
2275
|
+
df (pd.DataFrame): The DataFrame containing word frequency data.
|
2276
|
+
col_topic (str): The name of the column representing topics.
|
2277
|
+
title (str, optional): The title for the plot. Defaults to "Richesse lexicale par topic".
|
2278
|
+
width (int, optional): The width of the plot. Defaults to 1200.
|
2279
|
+
height (int, optional): The height of the plot. Defaults to 1000.
|
2280
|
+
template (str, optional): The template of the plot. Defaults to "plotly".
|
2251
2281
|
|
2252
2282
|
Returns:
|
2253
|
-
|
2283
|
+
fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization per topic.
|
2254
2284
|
"""
|
2255
2285
|
fig_richesse = go.Figure()
|
2256
2286
|
for topic in list(df[col_topic].unique()):
|
@@ -2266,6 +2296,8 @@ def richesse_lexicale_per_topic(df: pd.DataFrame,
|
|
2266
2296
|
)
|
2267
2297
|
fig_richesse.update_layout(title=title,
|
2268
2298
|
xaxis_title="Rank",
|
2299
|
+
font_family="Inria Sans",
|
2300
|
+
font_size = font_size,
|
2269
2301
|
width=width,
|
2270
2302
|
height=height,
|
2271
2303
|
template=template)
|
@@ -2286,28 +2318,29 @@ def subplots_bar_per_day_per_cat(df: pd.DataFrame,
|
|
2286
2318
|
height: int = 700,
|
2287
2319
|
plot_bgcolor: str = None,
|
2288
2320
|
paper_bgcolor: str = None,
|
2289
|
-
template: str = "plotly"
|
2321
|
+
template: str = "plotly",
|
2322
|
+
font_size: int = 16) -> go.Figure:
|
2290
2323
|
"""
|
2291
2324
|
Creates subplots of stacked bar charts per day and category using Plotly.
|
2292
2325
|
|
2293
|
-
|
2294
|
-
|
2295
|
-
|
2296
|
-
|
2297
|
-
|
2298
|
-
|
2299
|
-
|
2300
|
-
|
2301
|
-
|
2302
|
-
|
2303
|
-
|
2304
|
-
|
2305
|
-
|
2306
|
-
|
2307
|
-
|
2326
|
+
Args:
|
2327
|
+
df (pd.DataFrame): The DataFrame containing the data.
|
2328
|
+
col_date (str): The name of the column representing dates.
|
2329
|
+
col_cat (str): The name of the column representing categories.
|
2330
|
+
metrics (List[str]): A list of column names representing metrics to be plotted.
|
2331
|
+
col_color (str): The name of the column representing colors for bars.
|
2332
|
+
y_axis_titles (List[str]): A list of titles for the y-axes of subplots.
|
2333
|
+
xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
|
2334
|
+
title_text (str, optional): The title for the entire plot. Defaults to "Trend - couverture & résonance".
|
2335
|
+
vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
|
2336
|
+
width (int, optional): The width of the entire plot. Defaults to 1500.
|
2337
|
+
height (int, optional): The height of each subplot. Defaults to 700.
|
2338
|
+
plot_bgcolor (str, optional): The background color for the plot area. Defaults to None.
|
2339
|
+
paper_bgcolor (str, optional): The background color for the paper area. Defaults to None.
|
2340
|
+
template (str, optional): The template of the plot. Defaults to "plotly".
|
2308
2341
|
|
2309
2342
|
Returns:
|
2310
|
-
|
2343
|
+
fig (go.Figure): The Plotly Figure object representing the subplots of stacked bar charts.
|
2311
2344
|
"""
|
2312
2345
|
fig = make_subplots(
|
2313
2346
|
rows = len(metrics), # number of rows
|
@@ -2357,7 +2390,8 @@ def subplots_bar_per_day_per_cat(df: pd.DataFrame,
|
|
2357
2390
|
yaxis_showgrid=False,
|
2358
2391
|
uniformtext_minsize=8,
|
2359
2392
|
uniformtext_mode='hide',
|
2360
|
-
font_family="
|
2393
|
+
font_family="Inria Sans",
|
2394
|
+
font_size=font_size,
|
2361
2395
|
template=template,
|
2362
2396
|
plot_bgcolor=plot_bgcolor, #background color (plot)
|
2363
2397
|
paper_bgcolor=paper_bgcolor, #background color (around plot)
|
@@ -2399,22 +2433,22 @@ def add_shape(fig: go.Figure,
|
|
2399
2433
|
"""
|
2400
2434
|
Adds a shape to a Plotly figure.
|
2401
2435
|
|
2402
|
-
|
2403
|
-
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
|
2409
|
-
|
2410
|
-
|
2411
|
-
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2436
|
+
Args:
|
2437
|
+
fig (go.Figure): The Plotly Figure object.
|
2438
|
+
shape_type (str, optional): The type of shape to add. Defaults to "rect".
|
2439
|
+
x0 (float, optional): The x-coordinate of the lower left corner of the shape. Defaults to -1.
|
2440
|
+
y0 (float, optional): The y-coordinate of the lower left corner of the shape. Defaults to -1.
|
2441
|
+
x1 (float, optional): The x-coordinate of the upper right corner of the shape. Defaults to 0.
|
2442
|
+
y1 (float, optional): The y-coordinate of the upper right corner of the shape. Defaults to 0.
|
2443
|
+
fillcolor (str, optional): The fill color of the shape. Defaults to 'Silver'.
|
2444
|
+
opacity (float, optional): The opacity of the shape. Defaults to 0.1.
|
2445
|
+
line_width (float, optional): The width of the shape's outline. Defaults to 0.
|
2446
|
+
line_color (str, optional): The color of the shape's outline. Defaults to 'white'.
|
2447
|
+
dash (str, optional): The dash style of the shape's outline. Defaults to None.
|
2448
|
+
layer (str, optional): The layer on which the shape is added, either 'below' or 'above' the data. Defaults to "below".
|
2415
2449
|
|
2416
2450
|
Returns:
|
2417
|
-
|
2451
|
+
fig (go.Figure): The modified Plotly Figure object with the added shape.
|
2418
2452
|
"""
|
2419
2453
|
fig.add_shape(
|
2420
2454
|
# Shape for the area between (-1, 0)
|
@@ -2450,20 +2484,20 @@ def add_image(fig: go.Figure,
|
|
2450
2484
|
"""
|
2451
2485
|
Adds an image to a Plotly figure.
|
2452
2486
|
|
2453
|
-
|
2454
|
-
|
2455
|
-
|
2456
|
-
|
2457
|
-
|
2458
|
-
|
2459
|
-
|
2460
|
-
|
2461
|
-
|
2462
|
-
|
2463
|
-
|
2487
|
+
Args:
|
2488
|
+
fig (go.Figure): The Plotly Figure object.
|
2489
|
+
xref (str, optional): The x-coordinate reference point. Defaults to "paper".
|
2490
|
+
yref (str, optional): The y-coordinate reference point. Defaults to "paper".
|
2491
|
+
x (float, optional): The x-coordinate of the image position. Defaults to 0.
|
2492
|
+
y (float, optional): The y-coordinate of the image position. Defaults to 0.
|
2493
|
+
sizex (float, optional): The size of the image in the x-direction. Defaults to 0.08.
|
2494
|
+
sizey (float, optional): The size of the image in the y-direction. Defaults to 0.08.
|
2495
|
+
xanchor (str, optional): The x-coordinate anchor point. Defaults to "right".
|
2496
|
+
yanchor (str, optional): The y-coordinate anchor point. Defaults to "bottom".
|
2497
|
+
source (str, optional): The URL source of the image. Defaults to "https://www.example.com/image.jpg".
|
2464
2498
|
|
2465
2499
|
Returns:
|
2466
|
-
|
2500
|
+
fig (go.Figure): The modified Plotly Figure object with the added image.
|
2467
2501
|
"""
|
2468
2502
|
fig.add_layout_image(
|
2469
2503
|
dict(
|
@@ -2477,4 +2511,41 @@ def add_image(fig: go.Figure,
|
|
2477
2511
|
yanchor=yanchor
|
2478
2512
|
)
|
2479
2513
|
)
|
2514
|
+
return fig
|
2515
|
+
|
2516
|
+
def boxplot(df : pd.DataFrame, col_y : str = "degrees" , title : str ="Distribution of Node Degrees", yaxis_title : str = 'Degrees', width : int =1000, height: int =1000, plot_bgcolor: str = None, paper_bgcolor: str = None, template: str = "plotly", font_size : int = 16) -> go.Figure:
|
2517
|
+
"""
|
2518
|
+
Generates a box plot using Plotly Express with customization options.
|
2519
|
+
|
2520
|
+
Args:
|
2521
|
+
df (pd.DataFrame): The DataFrame containing the data to plot.
|
2522
|
+
col_y (str, optional): The column name in the DataFrame to plot on the y-axis. Default is "degrees".
|
2523
|
+
title (str, optional): The title of the plot. Default is "Distribution of Node Degrees".
|
2524
|
+
yaxis_title (str, optional): The label for the y-axis. Default is 'Degrees'.
|
2525
|
+
width (int, optional): The width of the plot in pixels. Default is 1000.
|
2526
|
+
height (int, optional): The height of the plot in pixels. Default is 1000.
|
2527
|
+
plot_bgcolor (str, optional): The background color of the plot area. Default is None.
|
2528
|
+
paper_bgcolor (str, optional): The background color of the paper (overall plot background). Default is None.
|
2529
|
+
template (str, optional): The template for the plot. Default is "plotly".
|
2530
|
+
font_size (int, optional): The font size for the plot text. Default is 16.
|
2531
|
+
|
2532
|
+
Returns:
|
2533
|
+
fig (go.Figure): The Plotly Figure object for the box plot.
|
2534
|
+
"""
|
2535
|
+
# Box plot using Plotly Express
|
2536
|
+
fig = px.box(df, y = col_y, title=title)
|
2537
|
+
|
2538
|
+
# Customize the plot (optional)
|
2539
|
+
fig.update_layout(
|
2540
|
+
yaxis_title = yaxis_title,
|
2541
|
+
xaxis_title='',
|
2542
|
+
showlegend=False,
|
2543
|
+
width=width,
|
2544
|
+
height=height,
|
2545
|
+
font_family="Inria Sans",
|
2546
|
+
font_size=font_size,
|
2547
|
+
template=template,
|
2548
|
+
plot_bgcolor=plot_bgcolor, #background color (plot)
|
2549
|
+
paper_bgcolor=paper_bgcolor
|
2550
|
+
)
|
2480
2551
|
return fig
|