opsci-toolbox 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opsci_toolbox/apis/rapidapi_helpers.py +82 -0
- opsci_toolbox/helpers/common.py +566 -191
- opsci_toolbox/helpers/cv.py +298 -123
- opsci_toolbox/helpers/dataviz.py +1005 -216
- opsci_toolbox/helpers/dates.py +55 -8
- opsci_toolbox/helpers/nlp.py +768 -110
- opsci_toolbox/helpers/nlp_cuml.py +280 -0
- opsci_toolbox/helpers/sna.py +101 -10
- opsci_toolbox/helpers/surreaction.py +156 -0
- {opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/METADATA +9 -11
- opsci_toolbox-0.0.6.dist-info/RECORD +21 -0
- opsci_toolbox-0.0.2.dist-info/RECORD +0 -19
- {opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/WHEEL +0 -0
- {opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/top_level.txt +0 -0
opsci_toolbox/helpers/dataviz.py
CHANGED
@@ -15,55 +15,115 @@ import math
|
|
15
15
|
import pandas as pd
|
16
16
|
from opsci_toolbox.helpers.nlp import sample_most_engaging_posts, create_frequency_table
|
17
17
|
from matplotlib.colors import to_hex
|
18
|
+
import networkx as nx
|
18
19
|
|
19
20
|
|
20
21
|
|
21
|
-
def upload_chart_studio(
|
22
|
+
def upload_chart_studio(
|
23
|
+
username: str,
|
24
|
+
api_key: str,
|
25
|
+
fig,
|
26
|
+
title: str
|
27
|
+
) -> tuple:
|
22
28
|
"""
|
23
|
-
Upload Plotly
|
29
|
+
Upload a Plotly visualization to Chart Studio.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
username (str): The Chart Studio username.
|
33
|
+
api_key (str): The Chart Studio API key.
|
34
|
+
fig: The Plotly figure object to be uploaded.
|
35
|
+
title (str): The title for the uploaded visualization.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
tuple: A tuple containing the URL of the uploaded visualization and the embed code.
|
24
39
|
"""
|
25
40
|
URL = ""
|
26
41
|
EMBED = ""
|
27
42
|
|
28
|
-
try:
|
43
|
+
try:
|
44
|
+
# Set Chart Studio credentials
|
29
45
|
tls.set_credentials_file(username=username, api_key=api_key)
|
30
|
-
|
46
|
+
|
47
|
+
# Upload the figure to Chart Studio
|
48
|
+
URL = py.plot(fig, filename=title, auto_open=True)
|
49
|
+
|
50
|
+
# Get the embed code for the uploaded figure
|
31
51
|
EMBED = tls.get_embed(URL)
|
32
|
-
|
33
|
-
|
52
|
+
|
53
|
+
# Print the URL and embed code
|
54
|
+
print("* URL DE LA VIZ >> ", URL)
|
55
|
+
print("\n*CODE EMBED A COLLER \n", EMBED)
|
34
56
|
|
35
57
|
except Exception as e:
|
36
|
-
|
58
|
+
# Print the exception message and a suggestion to reduce the visualization size
|
37
59
|
print(e, "try to reduce the dataviz size by printing less data")
|
38
60
|
|
39
|
-
return URL,EMBED
|
61
|
+
return URL, EMBED
|
62
|
+
|
63
|
+
def scale_to_0_10(x: pd.Series) -> pd.Series:
|
64
|
+
"""
|
65
|
+
Scale a pandas Series to the range [0, 10].
|
40
66
|
|
67
|
+
Args:
|
68
|
+
x (pd.Series): The input pandas Series to be scaled.
|
41
69
|
|
42
|
-
|
70
|
+
Returns:
|
71
|
+
pd.Series: The scaled pandas Series with values in the range [0, 10].
|
72
|
+
"""
|
43
73
|
return ((x - x.min()) / (x.max() - x.min()) * 10).astype(int)
|
44
74
|
|
45
|
-
def normalize_data_size(df, col:str, coef = 20, constant = 5):
|
75
|
+
def normalize_data_size(df: pd.DataFrame, col: str, coef: int = 20, constant: int = 5) -> pd.DataFrame:
|
46
76
|
"""
|
47
|
-
|
77
|
+
Normalize the sizes of dots based on a specified column in a DataFrame.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
df (pd.DataFrame): The input DataFrame.
|
81
|
+
col (str): The column name to be normalized.
|
82
|
+
coef (int, optional): The coefficient to scale the normalized values. Defaults to 20.
|
83
|
+
constant (int, optional): The constant to add to the scaled normalized values. Defaults to 5.
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
pd.DataFrame: The DataFrame with an additional column for the normalized sizes.
|
48
87
|
"""
|
49
|
-
df['normalized_'+col]=((df[col]-df[col].max())/(df[col]+df[col].max())+1) * coef + constant
|
88
|
+
df['normalized_' + col] = ((df[col] - df[col].max()) / (df[col] + df[col].max()) + 1) * coef + constant
|
50
89
|
return df
|
51
90
|
|
52
|
-
def generate_color_palette(lst, transparency=1):
|
91
|
+
def generate_color_palette(lst: list, transparency: float = 1) -> dict:
|
53
92
|
"""
|
54
|
-
|
93
|
+
Generate a random color palette of RGBA codes.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
lst (List[str]): List of color names or identifiers.
|
97
|
+
transparency (float, optional): Transparency value for RGBA colors (0 to 1). Defaults to 1.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
dict: Dictionary containing color names or identifiers as keys and corresponding RGBA codes as values.
|
55
101
|
"""
|
56
|
-
color_palette = {
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
102
|
+
color_palette = {
|
103
|
+
color: 'rgba({}, {}, {}, {})'.format(
|
104
|
+
random.randrange(0, 255),
|
105
|
+
random.randrange(0, 255),
|
106
|
+
random.randrange(0, 255),
|
107
|
+
transparency
|
108
|
+
)
|
109
|
+
for color in lst
|
110
|
+
}
|
62
111
|
return color_palette
|
63
112
|
|
64
|
-
def generate_color_palette_with_colormap(lst, colormap = "viridis"):
|
113
|
+
def generate_color_palette_with_colormap(lst: list, colormap: str = "viridis") -> dict:
|
114
|
+
"""
|
115
|
+
Generate a color palette with hexadecimal codes using a specified colormap.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
lst (List[str]): List of color names or identifiers.
|
119
|
+
colormap (str, optional): Name of the colormap to use. Defaults to "viridis".
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
Dict[str, str]: Dictionary containing color names or identifiers as keys and corresponding hexadecimal codes as values.
|
123
|
+
"""
|
65
124
|
num_colors = len(lst)
|
66
|
-
|
125
|
+
|
126
|
+
# Generate example data
|
67
127
|
data = np.linspace(0, 1, num_colors)
|
68
128
|
|
69
129
|
# Choose the colormap
|
@@ -76,38 +136,69 @@ def generate_color_palette_with_colormap(lst, colormap = "viridis"):
|
|
76
136
|
colors = cmap(norm(data))
|
77
137
|
|
78
138
|
# Convert colors to hexadecimal codes
|
79
|
-
hex_colors = {item
|
139
|
+
hex_colors = {item: to_hex(colors[i]) for i, item in enumerate(lst)}
|
80
140
|
|
81
141
|
return hex_colors
|
82
142
|
|
83
|
-
def generate_hexadecimal_color_palette(lst, add_transparency=False, transparency=0.5):
|
143
|
+
def generate_hexadecimal_color_palette(lst: list, add_transparency: bool = False, transparency: float = 0.5) -> dict:
|
84
144
|
"""
|
85
|
-
|
145
|
+
Generate a random color palette with hexadecimal codes and optional transparency.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
lst (List[str]): List of color names or identifiers.
|
149
|
+
add_transparency (bool, optional): Whether to add transparency to the colors. Defaults to False.
|
150
|
+
transparency (float, optional): Transparency value for the colors (0 to 1). Defaults to 0.5.
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
Dict[str, str]: Dictionary containing color names or identifiers as keys and corresponding hexadecimal codes as values.
|
86
154
|
"""
|
87
155
|
if add_transparency:
|
88
156
|
alpha_hex = int(transparency * 255) # Convert transparency to integer (0-255 range)
|
89
|
-
color_palette = {
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
157
|
+
color_palette = {
|
158
|
+
color: "#{:02x}{:02x}{:02x}{:02x}".format(
|
159
|
+
random.randint(0, 255),
|
160
|
+
random.randint(0, 255),
|
161
|
+
random.randint(0, 255),
|
162
|
+
alpha_hex
|
163
|
+
)
|
164
|
+
for color in lst
|
165
|
+
}
|
95
166
|
else:
|
96
|
-
color_palette = {
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
167
|
+
color_palette = {
|
168
|
+
color: "#{:02x}{:02x}{:02x}".format(
|
169
|
+
random.randint(0, 255),
|
170
|
+
random.randint(0, 255),
|
171
|
+
random.randint(0, 255)
|
172
|
+
)
|
173
|
+
for color in lst
|
174
|
+
}
|
101
175
|
return color_palette
|
102
176
|
|
103
|
-
def generate_random_hexadecimal_color():
|
104
|
-
|
177
|
+
def generate_random_hexadecimal_color() -> str:
|
178
|
+
"""
|
179
|
+
Generate a random hexadecimal color code.
|
105
180
|
|
106
|
-
|
181
|
+
Returns:
|
182
|
+
str: Hexadecimal color code.
|
107
183
|
"""
|
108
|
-
|
184
|
+
return "#{:02x}{:02x}{:02x}".format(
|
185
|
+
random.randint(0, 255),
|
186
|
+
random.randint(0, 255),
|
187
|
+
random.randint(0, 255)
|
188
|
+
)
|
189
|
+
|
190
|
+
def wrap_text(txt: str, length: int = 50) -> str:
|
109
191
|
"""
|
110
|
-
|
192
|
+
Wrap text to a specified length.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
txt (str): The text to wrap.
|
196
|
+
length (int, optional): The maximum length of each line. Defaults to 50.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
str: The wrapped text.
|
200
|
+
"""
|
201
|
+
txt = '<br>'.join(textwrap.wrap(str(txt), width=length))
|
111
202
|
return txt
|
112
203
|
|
113
204
|
def get_convex_hull_coord(points: np.array, interpolate_curve: bool = True) -> tuple:
|
@@ -282,31 +373,45 @@ def get_convex_hull_coord(points: np.array, interpolate_curve: bool = True) -> t
|
|
282
373
|
|
283
374
|
# return fig
|
284
375
|
|
285
|
-
def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color, col_size, col_text, title="Scatter Plot", x_axis_label="X-axis", y_axis_label="Y-axis", width=1000, height=1000, xaxis_range=None, yaxis_range=None,
|
286
|
-
size_value =4, opacity=0.8, maxdisplayed=0, mode = "markers", textposition="bottom center", plot_bgcolor=None, paper_bgcolor=None, yaxis_showgrid = False, xaxis_showgrid = False, color="indianred", line_width=0.5, line_color="white", colorscale='Viridis', showscale=True, template="plotly"):
|
376
|
+
def create_scatter_plot(df: pd.DataFrame, col_x: str, col_y: str, col_category: str, color_palette: dict, col_color: str, col_size: str, col_text: str, col_legend: list = [], title: str = "Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", width: int = 1000, height: int = 1000, xaxis_range: list =None, yaxis_range: list =None, size_value: int = 4, opacity: float = 0.8, maxdisplayed: int = 0, mode: str = "markers", textposition: str = "bottom center", plot_bgcolor: str = None, paper_bgcolor: str = None, yaxis_showgrid: bool = False, xaxis_showgrid: bool = False, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", colorscale: str = 'Viridis', showscale: bool = True, template: str = "plotly") -> go.Figure:
|
287
377
|
"""
|
288
|
-
Create a scatter plot
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
378
|
+
Create a scatter plot.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
df (pd.DataFrame): DataFrame containing all data.
|
382
|
+
col_x (str): Name of the column containing X values.
|
383
|
+
col_y (str): Name of the column containing Y values.
|
384
|
+
col_category (str): Name of the column for colorization.
|
385
|
+
color_palette (dict): A dictionary mapping category with color value.
|
386
|
+
col_color (str): Name of the column for color. Only used for continuous scale.
|
387
|
+
col_size (str): Name of the column for dot sizes.
|
388
|
+
col_text (str): Name of the column containing text for legend on hover.
|
389
|
+
col_legend (List[str], optional): List of column names for legend. Defaults to [].
|
390
|
+
title (str, optional): Graph title. Defaults to "Scatter Plot".
|
391
|
+
x_axis_label (str, optional): Label for X-axis. Defaults to "X-axis".
|
392
|
+
y_axis_label (str, optional): Label for Y-axis. Defaults to "Y-axis".
|
393
|
+
width (int, optional): Size of the graph. Defaults to 1000.
|
394
|
+
height (int, optional): Size of the graph. Defaults to 1000.
|
395
|
+
xaxis_range (list, optional): Range values for X-axis. Defaults to None.
|
396
|
+
yaxis_range (list, optional): Range values for Y-axis. Defaults to None.
|
397
|
+
size_value (int, optional): Minimum size (or constant) for dots. Defaults to 4.
|
398
|
+
opacity (float, optional): Dots transparency. Defaults to 0.8.
|
399
|
+
maxdisplayed (int, optional): Maximum number of dots to display. 0 = infinite. Defaults to 0.
|
400
|
+
mode (str, optional): Mode for the scatter plot. Defaults to "markers".
|
401
|
+
textposition (str, optional): Text position for hover. Defaults to "bottom center".
|
402
|
+
plot_bgcolor (str, optional): Background color for plot. Defaults to None.
|
403
|
+
paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
|
404
|
+
yaxis_showgrid (bool, optional): Whether to show grid on Y-axis. Defaults to False.
|
405
|
+
xaxis_showgrid (bool, optional): Whether to show grid on X-axis. Defaults to False.
|
406
|
+
color (str, optional): Color code for dots if col_category is None. Defaults to "indianred".
|
407
|
+
line_width (float, optional): Width of dots contours. Defaults to 0.5.
|
408
|
+
line_color (str, optional): Color of dots contours. Defaults to "white".
|
409
|
+
colorscale (str, optional): Color scale for continuous color mapping. Defaults to 'Viridis'.
|
410
|
+
showscale (bool, optional): Whether to show color scale. Defaults to True.
|
411
|
+
template (str, optional): Plotly template. Defaults to "plotly".
|
412
|
+
|
413
|
+
Returns:
|
414
|
+
go.Figure: Plotly scatter plot figure.
|
310
415
|
"""
|
311
416
|
|
312
417
|
if line_color is None :
|
@@ -327,8 +432,9 @@ def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color
|
|
327
432
|
size = df[df[col_category] == category][col_size]
|
328
433
|
hovertemplate += '<br><b>'+col_size+'</b>:'+size.astype(str)
|
329
434
|
|
330
|
-
if
|
331
|
-
|
435
|
+
if len(col_legend)>0:
|
436
|
+
for c in col_legend:
|
437
|
+
hovertemplate +='<br><b>'+str(c)+'</b>:'+ df[df[col_category]==category][c].astype(str).apply(wrap_text)
|
332
438
|
|
333
439
|
fig.add_trace(
|
334
440
|
go.Scatter(
|
@@ -365,13 +471,16 @@ def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color
|
|
365
471
|
else :
|
366
472
|
if color is None:
|
367
473
|
color = generate_random_hexadecimal_color()
|
368
|
-
if
|
369
|
-
|
474
|
+
if len(col_legend)>0:
|
475
|
+
for c in col_legend:
|
476
|
+
hovertemplate +='<br><b>'+str(c)+'</b>:'+ df[c].astype(str).apply(wrap_text)
|
370
477
|
|
371
478
|
fig = go.Figure( go.Scatter(
|
372
479
|
x=df[col_x],
|
373
480
|
y=df[col_y],
|
374
481
|
mode=mode,
|
482
|
+
text = df[col_text],
|
483
|
+
textposition=textposition,
|
375
484
|
marker=dict(color=color, #dots color
|
376
485
|
size=size, #dots size
|
377
486
|
opacity=opacity, #dots opacity
|
@@ -418,52 +527,83 @@ def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color
|
|
418
527
|
)
|
419
528
|
return fig
|
420
529
|
|
421
|
-
def add_annotations(fig, df, col_x, col_y, col_txt, width=1000, label_size_ratio=100, bordercolor = "#C7C7C7", arrowcolor = "SlateGray", bgcolor ="#FFFFFF", font_color = "SlateGray"):
|
422
|
-
|
530
|
+
def add_annotations(fig: go.Figure, df: pd.DataFrame, col_x: str, col_y: str, col_txt: str, width: int = 1000, label_size_ratio: int = 100, bordercolor: str = "#C7C7C7", arrowcolor: str = "SlateGray", bgcolor: str = "#FFFFFF", font_color: str = "SlateGray") -> go.Figure:
|
531
|
+
"""
|
532
|
+
Add annotations to a Plotly figure.
|
533
|
+
|
534
|
+
Args:
|
535
|
+
fig (go.Figure): Plotly figure object.
|
536
|
+
df (pd.DataFrame): DataFrame containing annotation data.
|
537
|
+
col_x (str): Name of the column containing X values.
|
538
|
+
col_y (str): Name of the column containing Y values.
|
539
|
+
col_txt (str): Name of the column containing text for annotations.
|
540
|
+
width (int, optional): Width of the figure. Defaults to 1000.
|
541
|
+
label_size_ratio (int, optional): Ratio of label size to figure width. Defaults to 100.
|
542
|
+
bordercolor (str, optional): Color of annotation borders. Defaults to "#C7C7C7".
|
543
|
+
arrowcolor (str, optional): Color of annotation arrows. Defaults to "SlateGray".
|
544
|
+
bgcolor (str, optional): Background color of annotations. Defaults to "#FFFFFF".
|
545
|
+
font_color (str, optional): Color of annotation text. Defaults to "SlateGray".
|
546
|
+
|
547
|
+
Returns:
|
548
|
+
go.Figure: Plotly figure object with annotations added.
|
549
|
+
"""
|
550
|
+
df[col_txt] = df[col_txt].fillna("").astype(str)
|
551
|
+
|
423
552
|
for i, row in df.iterrows():
|
424
|
-
fig.add_annotation(
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
553
|
+
fig.add_annotation(
|
554
|
+
x=row[col_x],
|
555
|
+
y=row[col_y],
|
556
|
+
text='<b>'+wrap_text(row[col_txt])+'</b>',
|
557
|
+
showarrow=True,
|
558
|
+
arrowhead=1,
|
559
|
+
font=dict(
|
560
|
+
family="Helvetica, Sans-serif",
|
561
|
+
size=width / label_size_ratio,
|
562
|
+
color=font_color
|
563
|
+
),
|
564
|
+
bordercolor=bordercolor,
|
565
|
+
borderwidth=width / 1000,
|
566
|
+
borderpad=width / 500,
|
567
|
+
bgcolor=bgcolor,
|
568
|
+
opacity=1,
|
569
|
+
arrowcolor=arrowcolor
|
570
|
+
)
|
441
571
|
|
442
572
|
return fig
|
443
573
|
|
444
|
-
def scatter3D(df, col_x, col_y, col_z, col_category, color_palette, col_size, col_text, title="3D Scatter Plot", x_axis_label="X-axis", y_axis_label="Y-axis", z_axis_label="Z-axis", width=1000, height=1000, xaxis_range=None, yaxis_range=None,
|
445
|
-
zaxis_range=None, size_value =4, opacity=0.8, plot_bgcolor=None, paper_bgcolor=None, color="indianred", line_width=0.5, line_color="white", template = "plotly"):
|
574
|
+
def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category: str, color_palette: dict, col_size: str, col_text: str, title: str = "3D Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", z_axis_label: str = "Z-axis", width: int = 1000, height: int = 1000, xaxis_range: list = None, yaxis_range: list = None, zaxis_range: list = None, size_value: int = 4, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", template: str = "plotly") -> go.Figure:
|
446
575
|
"""
|
447
|
-
Create a 3D scatter plot
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
576
|
+
Create a 3D scatter plot.
|
577
|
+
|
578
|
+
Args:
|
579
|
+
df (pd.DataFrame): DataFrame containing all data.
|
580
|
+
col_x (str): Name of the column containing X values.
|
581
|
+
col_y (str): Name of the column containing Y values.
|
582
|
+
col_z (str): Name of the column containing Z values.
|
583
|
+
col_category (str): Name of the column for colorization.
|
584
|
+
color_palette (dict): A dictionary mapping categories with color values.
|
585
|
+
col_size (str): Name of the column for dot sizes.
|
586
|
+
col_text (str): Name of the column containing text for legend on hover.
|
587
|
+
title (str, optional): Graph title. Defaults to "3D Scatter Plot".
|
588
|
+
x_axis_label (str, optional): Label for X-axis. Defaults to "X-axis".
|
589
|
+
y_axis_label (str, optional): Label for Y-axis. Defaults to "Y-axis".
|
590
|
+
z_axis_label (str, optional): Label for Z-axis. Defaults to "Z-axis".
|
591
|
+
width (int, optional): Width of the graph. Defaults to 1000.
|
592
|
+
height (int, optional): Height of the graph. Defaults to 1000.
|
593
|
+
xaxis_range (list, optional): Range values for the X-axis. Defaults to None.
|
594
|
+
yaxis_range (list, optional): Range values for the Y-axis. Defaults to None.
|
595
|
+
zaxis_range (list, optional): Range values for the Z-axis. Defaults to None.
|
596
|
+
size_value (int, optional): Minimum size (or constant) for dots. Defaults to 4.
|
597
|
+
opacity (float, optional): Dots transparency. Defaults to 0.8.
|
598
|
+
plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
|
599
|
+
paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
|
600
|
+
color (str, optional): Color code for dots if col_category is None. Defaults to "indianred".
|
601
|
+
line_width (float, optional): Width of dots contours. Defaults to 0.5.
|
602
|
+
line_color (str, optional): Color of dots contours. Defaults to "white".
|
603
|
+
template (str, optional): Plotly template. Defaults to "plotly".
|
604
|
+
|
605
|
+
Returns:
|
606
|
+
go.Figure: Plotly figure object.
|
467
607
|
"""
|
468
608
|
fig=go.Figure()
|
469
609
|
if col_category is not None:
|
@@ -582,57 +722,71 @@ def scatter3D(df, col_x, col_y, col_z, col_category, color_palette, col_size, co
|
|
582
722
|
|
583
723
|
return fig
|
584
724
|
|
585
|
-
|
725
|
+
|
726
|
+
def fig_bar_trend(df: pd.DataFrame, col_x: str, bar_measure: str, trend_measure: str, x_name: str = "X", bar_name: str = "metric1", trend_name: str = "metric2", marker_color: str = 'lightpink', line_color: str = 'indianred', title_text: str = "Couverture & Résonance", width: int = 1500, height: int = 700, xaxis_tickangle: int = 0, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, template: str = "plotly") -> go.Figure:
|
586
727
|
"""
|
587
|
-
Display a graph that
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
728
|
+
Display a graph that combines bar and trend chart to compare 2 metrics.
|
729
|
+
|
730
|
+
Args:
|
731
|
+
df (pd.DataFrame): DataFrame containing all data.
|
732
|
+
col_x (str): Name of the column containing X values.
|
733
|
+
bar_measure (str): Data represented as bar diagram.
|
734
|
+
trend_measure (str): Data represented as trend line.
|
735
|
+
x_name (str, optional): Label for X-axis. Defaults to "X".
|
736
|
+
bar_name (str, optional): Label for the bar measure. Defaults to "metric1".
|
737
|
+
trend_name (str, optional): Label for the trend measure. Defaults to "metric2".
|
738
|
+
marker_color (str, optional): Color code for bars. Defaults to 'lightpink'.
|
739
|
+
line_color (str, optional): Color code for trend line. Defaults to 'indianred'.
|
740
|
+
title_text (str, optional): Graph title. Defaults to "Couverture & Résonance".
|
741
|
+
width (int, optional): Width of the graph. Defaults to 1500.
|
742
|
+
height (int, optional): Height of the graph. Defaults to 700.
|
743
|
+
xaxis_tickangle (int, optional): Angle for x ticks. Defaults to 0.
|
744
|
+
opacity (float, optional): Opacity of bars. Defaults to 0.8.
|
745
|
+
plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
|
746
|
+
paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
|
747
|
+
template (str, optional): Plotly template. Defaults to "plotly".
|
748
|
+
|
749
|
+
Returns:
|
750
|
+
go.Figure: Plotly figure object.
|
598
751
|
"""
|
599
752
|
|
600
|
-
nk = np.empty(shape=(len(x), 3, 1), dtype="object")
|
601
|
-
nk[:, 0] = np.array(x.apply(lambda txt: '<br>'.join(textwrap.wrap(str(txt), width=50)))).reshape(-1, 1)
|
602
|
-
nk[:, 1] = np.array(bar_measure).reshape(-1, 1)
|
603
|
-
nk[:, 2] = np.array(trend_measure).reshape(-1, 1)
|
753
|
+
# nk = np.empty(shape=(len(x), 3, 1), dtype="object")
|
754
|
+
# nk[:, 0] = np.array(x.apply(lambda txt: '<br>'.join(textwrap.wrap(str(txt), width=50)))).reshape(-1, 1)
|
755
|
+
# nk[:, 1] = np.array(bar_measure).reshape(-1, 1)
|
756
|
+
# nk[:, 2] = np.array(trend_measure).reshape(-1, 1)
|
604
757
|
|
605
758
|
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
606
759
|
|
607
760
|
fig.add_trace(
|
608
761
|
go.Scatter(
|
609
|
-
x=
|
610
|
-
y=trend_measure,
|
762
|
+
x=df[col_x].apply(wrap_text),
|
763
|
+
y=df[trend_measure],
|
611
764
|
name=trend_name,
|
612
765
|
mode='lines',
|
613
766
|
line_color=line_color,
|
614
767
|
line_width=4,
|
615
768
|
textfont=dict(size=8),
|
616
|
-
customdata=nk,
|
617
|
-
hovertemplate=("<br>"+x_name+"
|
769
|
+
# customdata=nk,
|
770
|
+
hovertemplate=("<br>"+x_name+" :"+df[col_x].astype(str)+"<br>"+bar_name+" - "+df[bar_measure].astype(str)+"<br>"+trend_name+" : "+df[trend_measure].astype(str)+"<extra></extra>"),
|
618
771
|
),
|
619
772
|
secondary_y=True,
|
620
773
|
)
|
621
774
|
# Add traces
|
622
775
|
fig.add_trace(
|
623
776
|
go.Bar(
|
624
|
-
x=
|
625
|
-
y = bar_measure,
|
777
|
+
x=df[col_x].apply(wrap_text),
|
778
|
+
y = df[bar_measure],
|
626
779
|
name=bar_name,
|
627
780
|
marker_color=marker_color,
|
628
781
|
opacity=opacity,
|
629
|
-
|
782
|
+
# customdata=nk,
|
783
|
+
hovertemplate=("<br>"+x_name+" :"+df[col_x].astype(str)+"<br>"+bar_name+" - "+df[bar_measure].astype(str)+"<br>"+trend_name+" : "+df[trend_measure].astype(str)+"<extra></extra>"),
|
630
784
|
),
|
631
785
|
secondary_y=False,
|
632
786
|
|
633
787
|
)
|
634
|
-
first_axis_range=[-0.5,bar_measure.max()*1.01]
|
635
|
-
secondary_axis_range=[-0.5,trend_measure.max()*1.01]
|
788
|
+
first_axis_range=[-0.5,df[bar_measure].max()*1.01]
|
789
|
+
secondary_axis_range=[-0.5,df[trend_measure].max()*1.01]
|
636
790
|
|
637
791
|
# Add figure title
|
638
792
|
fig.update_layout(
|
@@ -668,54 +822,150 @@ def fig_bar_trend(x, bar_measure, trend_measure, x_name="X", bar_name ="metric1"
|
|
668
822
|
return fig
|
669
823
|
|
670
824
|
|
671
|
-
def
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
825
|
+
# def fig_bar_trend(x, bar_measure, trend_measure, x_name="X", bar_name ="metric1", trend_name = "metric2", marker_color='lightpink', line_color='indianred', title_text="Couverture & Résonance", width=1500, height=700, xaxis_tickangle=0, opacity=0.8, plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
|
826
|
+
# """
|
827
|
+
# Display a graph that combine bar and trend chart to compare 2 metrics :
|
828
|
+
# - x = x axis data
|
829
|
+
# - bar_measure = data represented as bar diagram
|
830
|
+
# - trend_measure = data represented as trend line
|
831
|
+
# - x_name / bar_name / trend_name : axis labels
|
832
|
+
# - marker_color = color code for bars
|
833
|
+
# - line_color = color code for trend line
|
834
|
+
# - title_text = graph title
|
835
|
+
# - width / height = size of plot
|
836
|
+
# - xaxis_tickangle = angle for x ticks
|
837
|
+
# - opacity = opacity of bars
|
838
|
+
# """
|
839
|
+
|
840
|
+
# nk = np.empty(shape=(len(x), 3, 1), dtype="object")
|
841
|
+
# nk[:, 0] = np.array(x.apply(lambda txt: '<br>'.join(textwrap.wrap(str(txt), width=50)))).reshape(-1, 1)
|
842
|
+
# nk[:, 1] = np.array(bar_measure).reshape(-1, 1)
|
843
|
+
# nk[:, 2] = np.array(trend_measure).reshape(-1, 1)
|
844
|
+
|
845
|
+
# fig = make_subplots(specs=[[{"secondary_y": True}]])
|
846
|
+
|
847
|
+
# fig.add_trace(
|
848
|
+
# go.Scatter(
|
849
|
+
# x=x,
|
850
|
+
# y=trend_measure,
|
851
|
+
# name=trend_name,
|
852
|
+
# mode='lines',
|
853
|
+
# line_color=line_color,
|
854
|
+
# line_width=4,
|
855
|
+
# textfont=dict(size=8),
|
856
|
+
# customdata=nk,
|
857
|
+
# hovertemplate=("<br>"+x_name+" :%{customdata[0]}<br>"+bar_name+" - %{customdata[1]}<br>"+trend_name+":%{customdata[2]}"+"<extra></extra>"),
|
858
|
+
# ),
|
859
|
+
# secondary_y=True,
|
860
|
+
# )
|
861
|
+
# # Add traces
|
862
|
+
# fig.add_trace(
|
863
|
+
# go.Bar(
|
864
|
+
# x=x,
|
865
|
+
# y = bar_measure,
|
866
|
+
# name=bar_name,
|
867
|
+
# marker_color=marker_color,
|
868
|
+
# opacity=opacity,
|
869
|
+
# hovertemplate=("<br>"+x_name+" :%{customdata[0]}<br>"+bar_name+" - %{customdata[1]}<br>"+trend_name+":%{customdata[2]}"+"<extra></extra>"),
|
870
|
+
# ),
|
871
|
+
# secondary_y=False,
|
872
|
+
|
873
|
+
# )
|
874
|
+
# first_axis_range=[-0.5,bar_measure.max()*1.01]
|
875
|
+
# secondary_axis_range=[-0.5,trend_measure.max()*1.01]
|
876
|
+
|
877
|
+
# # Add figure title
|
878
|
+
# fig.update_layout(
|
879
|
+
|
880
|
+
# title_text=title_text,
|
881
|
+
# showlegend=True,
|
882
|
+
# width = width,
|
883
|
+
# height= height,
|
884
|
+
# xaxis_tickangle=xaxis_tickangle,
|
885
|
+
# xaxis_showline=False,
|
886
|
+
# xaxis_showgrid=False,
|
887
|
+
# yaxis_showline=False,
|
888
|
+
# yaxis_showgrid=False,
|
889
|
+
# font_family="Segoe UI Semibold",
|
890
|
+
# template=template,
|
891
|
+
# plot_bgcolor=plot_bgcolor, #background color (plot)
|
892
|
+
# paper_bgcolor=paper_bgcolor, #background color (around plot)
|
893
|
+
# margin=dict(
|
894
|
+
# t=width / 15,
|
895
|
+
# b=width / 20,
|
896
|
+
# r=width / 20,
|
897
|
+
# l=width / 20,
|
898
|
+
# ),
|
899
|
+
# )
|
900
|
+
|
901
|
+
# # # Set x-axis title
|
902
|
+
# fig.update_xaxes(title_text=x_name)
|
903
|
+
|
904
|
+
# # Set y-axes titles
|
905
|
+
# fig.update_yaxes(title_text=bar_name, range = first_axis_range, secondary_y=False)
|
906
|
+
# fig.update_yaxes(title_text=trend_name, range = secondary_axis_range, secondary_y=True)
|
907
|
+
|
908
|
+
# return fig
|
909
|
+
|
910
|
+
|
911
|
+
def density_map(df_posts: pd.DataFrame,
|
912
|
+
df_dots: pd.DataFrame,
|
913
|
+
df_topics: pd.DataFrame,
|
914
|
+
col_topic: str,
|
915
|
+
col_engagement: str,
|
916
|
+
col_text: str,
|
917
|
+
col_text_dots: str,
|
918
|
+
colorscale: str = "Portland",
|
919
|
+
marker_color: str = "#ff7f0e",
|
920
|
+
arrow_color: str = "#ff7f0e",
|
921
|
+
width: int = 1000,
|
922
|
+
height: int = 1000,
|
923
|
+
show_text: bool = True,
|
924
|
+
show_topics: bool = True,
|
925
|
+
show_halo: bool = False,
|
926
|
+
show_histogram: bool = True,
|
927
|
+
label_size_ratio: int = 100,
|
928
|
+
n_words: int = 3,
|
929
|
+
title_text: str = "Clustering",
|
930
|
+
max_dots_displayed: int = 0,
|
931
|
+
max_topics_displayed: int = 20,
|
932
|
+
opacity: float = 0.3,
|
933
|
+
plot_bgcolor: str = None,
|
934
|
+
paper_bgcolor: str = None,
|
935
|
+
template: str = "plotly") -> go.Figure:
|
936
|
+
"""
|
937
|
+
Display a 2D histogram with contours and scattered dots.
|
938
|
+
|
939
|
+
Args:
|
940
|
+
df_posts (pd.DataFrame): DataFrame containing all data points to plot (corresponding to contours).
|
941
|
+
df_dots (pd.DataFrame): DataFrame containing a sample of points to plot as dots.
|
942
|
+
df_topics (pd.DataFrame): DataFrame containing topics representations.
|
943
|
+
col_topic (str): Column name corresponding to category.
|
944
|
+
col_engagement (str): Column name corresponding to a metric.
|
945
|
+
col_text (str): Column name corresponding to a text separated by |.
|
946
|
+
col_text_dots (str): Column name corresponding to the text for dots.
|
947
|
+
colorscale (str, optional): Possible values are https://plotly.com/python/builtin-colorscales/. Defaults to "Portland".
|
948
|
+
marker_color (str, optional): Dots color value. Defaults to "#ff7f0e".
|
949
|
+
arrow_color (str, optional): Arrow pointing to topic centroid color value. Defaults to "#ff7f0e".
|
950
|
+
width (int, optional): Width of the plot. Defaults to 1000.
|
951
|
+
height (int, optional): Height of the plot. Defaults to 1000.
|
952
|
+
show_text (bool, optional): Show dots. Defaults to True.
|
953
|
+
show_topics (bool, optional): Show topics labels. Defaults to True.
|
954
|
+
show_halo (bool, optional): Show circles around topics. Defaults to False.
|
955
|
+
show_histogram (bool, optional): Show 2D histogram with contours. Defaults to True.
|
956
|
+
label_size_ratio (int, optional): Influence the size of the topics labels. Higher value means smaller topics labels. Defaults to 100.
|
957
|
+
n_words (int, optional): Number of words to display. Defaults to 3.
|
958
|
+
title_text (str, optional): Graph title. Defaults to "Clustering".
|
959
|
+
max_dots_displayed (int, optional): Number of dots to display. Defaults to 0.
|
960
|
+
max_topics_displayed (int, optional): Number of topics to display. Defaults to 20.
|
961
|
+
opacity (float, optional): Opacity of dots. Defaults to 0.3.
|
962
|
+
plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
|
963
|
+
paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
|
964
|
+
template (str, optional): Plotly template. Defaults to "plotly".
|
965
|
+
|
966
|
+
Returns:
|
967
|
+
go.Figure: Plotly figure object.
|
696
968
|
"""
|
697
|
-
Display a 2Dhistogram with contours :
|
698
|
-
- df_posts : dataframe containing all data points to plot (corresponding to contours)
|
699
|
-
- df_dots : dataframe containing a sample of points to plot as dots
|
700
|
-
- df_topics : dataframe containing topics representations
|
701
|
-
- col_topic : column name corresponding to category
|
702
|
-
- col_engagement : column name corresponding to a metric
|
703
|
-
- col_text : column name corresponding to a text separated by |
|
704
|
-
- colorscale : possible values are https://plotly.com/python/builtin-colorscales/
|
705
|
-
- marker_color : dots color value
|
706
|
-
- arrow_color : arrow pointing to topic centroid color value
|
707
|
-
- width / height = size of plot
|
708
|
-
- show_text : show dots
|
709
|
-
- show_topic : show topics labels
|
710
|
-
- show_halo : show circles around topics
|
711
|
-
- show_histogram : show 2Dhistogram with contours
|
712
|
-
- label_size_ratio : influence the size of the topics labels, higher value means smaller topics labels
|
713
|
-
- n_words : number of words to display (words should be separated by | in col_text)
|
714
|
-
- title_text = graph title
|
715
|
-
- max_dots_displayed : number of dots to display,
|
716
|
-
- max_topics_displayed : number of topics to display
|
717
|
-
- opacity : opacity of dots
|
718
|
-
"""
|
719
969
|
|
720
970
|
# df_topics = df_distrib_sample.copy()
|
721
971
|
df_topics= df_topics.dropna(subset=col_text)
|
@@ -834,9 +1084,25 @@ def density_map(df_posts,
|
|
834
1084
|
|
835
1085
|
|
836
1086
|
|
837
|
-
def topic_heatmap(df
|
1087
|
+
def topic_heatmap(df: pd.DataFrame,
|
1088
|
+
col_x: str = "topic_x",
|
1089
|
+
col_y: str = "topic_y",
|
1090
|
+
col_topic: str = "soft_topic",
|
1091
|
+
color_continuous_scale: str = 'GnBu',
|
1092
|
+
title: str = "Similarity between topics") -> go.Figure:
|
838
1093
|
"""
|
839
|
-
|
1094
|
+
Display a heatmap representing the similarity between topics.
|
1095
|
+
|
1096
|
+
Args:
|
1097
|
+
df (pd.DataFrame): DataFrame containing the topic data.
|
1098
|
+
col_x (str, optional): Column name for x-axis coordinates. Defaults to "topic_x".
|
1099
|
+
col_y (str, optional): Column name for y-axis coordinates. Defaults to "topic_y".
|
1100
|
+
col_topic (str, optional): Column name for the topic labels. Defaults to "soft_topic".
|
1101
|
+
color_continuous_scale (str, optional): Plotly color scale. Defaults to 'GnBu'.
|
1102
|
+
title (str, optional): Title of the heatmap. Defaults to "Similarity between topics".
|
1103
|
+
|
1104
|
+
Returns:
|
1105
|
+
go.Figure: Plotly figure object representing the heatmap.
|
840
1106
|
"""
|
841
1107
|
|
842
1108
|
distance_matrix = cosine_similarity(np.array(df[[col_x,col_y]]))
|
@@ -871,7 +1137,34 @@ def topic_heatmap(df, col_x = "topic_x", col_y = "topic_y", col_topic = "soft_to
|
|
871
1137
|
fig.update_layout(legend_title_text='Trend')
|
872
1138
|
return fig
|
873
1139
|
|
874
|
-
def generate_wordcloud(df
|
1140
|
+
def generate_wordcloud(df: pd.DataFrame,
|
1141
|
+
col_word: str,
|
1142
|
+
col_metric: str,
|
1143
|
+
width: int = 3000,
|
1144
|
+
height: int = 1500,
|
1145
|
+
dpi: int = 300,
|
1146
|
+
background_color: str = 'white',
|
1147
|
+
font_path: str = "font/SEGUIEMJ.TTF",
|
1148
|
+
colormap: str = "Viridis",
|
1149
|
+
show: bool = False) -> WordCloud:
|
1150
|
+
"""
|
1151
|
+
Generate a word cloud from a DataFrame.
|
1152
|
+
|
1153
|
+
Args:
|
1154
|
+
df (pd.DataFrame): DataFrame containing word frequency data.
|
1155
|
+
col_word (str): Column name containing words.
|
1156
|
+
col_metric (str): Column name containing frequency metrics for each word.
|
1157
|
+
width (int, optional): Width of the word cloud image. Defaults to 3000.
|
1158
|
+
height (int, optional): Height of the word cloud image. Defaults to 1500.
|
1159
|
+
dpi (int, optional): Dots per inch for image resolution. Defaults to 300.
|
1160
|
+
background_color (str, optional): Background color of the word cloud image. Defaults to 'white'.
|
1161
|
+
font_path (str, optional): Path to the font file to be used in the word cloud. Defaults to "font/SEGUIEMJ.TTF".
|
1162
|
+
colormap (str, optional): Colormap for the word cloud image. Defaults to "Viridis".
|
1163
|
+
show (bool, optional): Whether to display the word cloud image. Defaults to False.
|
1164
|
+
|
1165
|
+
Returns:
|
1166
|
+
WordCloud: WordCloud object representing the generated word cloud.
|
1167
|
+
"""
|
875
1168
|
|
876
1169
|
top_n_words={row[col_word]:row[col_metric] for i,row in df.iterrows()}
|
877
1170
|
|
@@ -882,12 +1175,36 @@ def generate_wordcloud(df, col_word, col_metric, width=3000, height=1500, dpi=30
|
|
882
1175
|
plt.imshow(wordcloud, interpolation='bilinear')
|
883
1176
|
plt.axis('off')
|
884
1177
|
plt.show()
|
885
|
-
|
886
1178
|
return wordcloud
|
887
1179
|
|
1180
|
+
def create_radar(df: pd.DataFrame,
|
1181
|
+
col_topic: str,
|
1182
|
+
col_metrics: list,
|
1183
|
+
title: str = "Radar",
|
1184
|
+
opacity: float = 0.6,
|
1185
|
+
width: int = 1000,
|
1186
|
+
height: int = 1000,
|
1187
|
+
template: str = "ggplot2",
|
1188
|
+
plot_bgcolor: str = None,
|
1189
|
+
paper_bgcolor: str = None) -> go.Figure:
|
1190
|
+
"""
|
1191
|
+
Create a radar chart.
|
888
1192
|
|
1193
|
+
Args:
|
1194
|
+
df (pd.DataFrame): DataFrame containing data for radar chart.
|
1195
|
+
col_topic (str): Column name containing topics.
|
1196
|
+
col_metrics (List[str]): List of column names containing metric values.
|
1197
|
+
title (str, optional): Title of the radar chart. Defaults to "Radar".
|
1198
|
+
opacity (float, optional): Opacity of radar area. Defaults to 0.6.
|
1199
|
+
width (int, optional): Width of the radar chart. Defaults to 1000.
|
1200
|
+
height (int, optional): Height of the radar chart. Defaults to 1000.
|
1201
|
+
template (str, optional): Plotly template to use. Defaults to "ggplot2".
|
1202
|
+
plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
|
1203
|
+
paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
|
889
1204
|
|
890
|
-
|
1205
|
+
Returns:
|
1206
|
+
go.Figure: Plotly Figure object representing the radar chart.
|
1207
|
+
"""
|
891
1208
|
|
892
1209
|
df = df[[col_topic] + col_metrics]
|
893
1210
|
col_metrics.append(col_metrics[0])
|
@@ -934,12 +1251,54 @@ def create_radar(df, col_topic, col_metrics, title="Radar", opacity=0.6, width =
|
|
934
1251
|
template=template,
|
935
1252
|
margin=dict(l=100, r=100, t=100, b=100)
|
936
1253
|
)
|
1254
|
+
return fig
|
937
1255
|
|
1256
|
+
def bar_subplots(df: pd.DataFrame,
|
1257
|
+
col_x: str,
|
1258
|
+
col_y: str,
|
1259
|
+
col_cat: str,
|
1260
|
+
color_palette: dict = None,
|
1261
|
+
n_cols: int = 4,
|
1262
|
+
n_top_words: int = 20,
|
1263
|
+
horizontal_spacing: float = 0.2,
|
1264
|
+
vertical_spacing: float = 0.08,
|
1265
|
+
textposition: str = None,
|
1266
|
+
color: str = None,
|
1267
|
+
title: str = "Top words per topic",
|
1268
|
+
template: str = "plotly",
|
1269
|
+
bargap: float = 0.4,
|
1270
|
+
width: int = 500,
|
1271
|
+
height: int = 35,
|
1272
|
+
plot_bgcolor: str = None,
|
1273
|
+
paper_bgcolor: str = None,
|
1274
|
+
showlegend: bool = True) -> go.Figure:
|
1275
|
+
"""
|
1276
|
+
Create subplots of horizontal bar charts.
|
938
1277
|
|
939
|
-
|
1278
|
+
Args:
|
1279
|
+
df (pd.DataFrame): DataFrame containing data for bar charts.
|
1280
|
+
col_x (str): Name of the column containing x-axis values.
|
1281
|
+
col_y (str): Name of the column containing y-axis values.
|
1282
|
+
col_cat (str): Name of the column containing categories.
|
1283
|
+
color_palette (Optional[Dict[str, str]], optional): Dictionary mapping categories to colors. Defaults to None.
|
1284
|
+
n_cols (int, optional): Number of columns in the subplot grid. Defaults to 4.
|
1285
|
+
n_top_words (int, optional): Number of top words to display in each bar chart. Defaults to 20.
|
1286
|
+
horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.2.
|
1287
|
+
vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
|
1288
|
+
textposition (Optional[str], optional): Position of the text relative to the bars ('inside', 'outside', or None). Defaults to None.
|
1289
|
+
color (Optional[str], optional): Color of the bars. Defaults to None.
|
1290
|
+
title (str, optional): Title of the subplot. Defaults to "Top words per topic".
|
1291
|
+
template (str, optional): Plotly template to use. Defaults to "plotly".
|
1292
|
+
bargap (float, optional): Space between bars in the same cluster. Defaults to 0.4.
|
1293
|
+
width (int, optional): Width of each subplot. Defaults to 500.
|
1294
|
+
height (int, optional): Height of each bar in the subplot. Defaults to 35.
|
1295
|
+
plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
|
1296
|
+
paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
|
1297
|
+
showlegend (bool, optional): Whether to display the legend. Defaults to True.
|
940
1298
|
|
941
|
-
|
942
|
-
|
1299
|
+
Returns:
|
1300
|
+
go.Figure: Plotly Figure object representing the subplots of horizontal bar charts.
|
1301
|
+
"""
|
943
1302
|
categories = df[col_cat].unique()
|
944
1303
|
|
945
1304
|
# user define a number of columns, we compute the number of rows requires
|
@@ -947,16 +1306,16 @@ def bar_subplots(df, col_x, col_y, col_cat, color_palette, n_cols=4, n_top_words
|
|
947
1306
|
|
948
1307
|
# fine tune parameter according to the text position provided
|
949
1308
|
if textposition == 'inside':
|
950
|
-
horizontal_spacing = (horizontal_spacing /
|
1309
|
+
horizontal_spacing = (horizontal_spacing / n_cols)/2
|
951
1310
|
else:
|
952
|
-
horizontal_spacing = (horizontal_spacing /
|
1311
|
+
horizontal_spacing = (horizontal_spacing / n_cols)
|
953
1312
|
|
954
1313
|
# create subplots
|
955
1314
|
fig = make_subplots(
|
956
1315
|
rows = n_rows, # number of rows
|
957
1316
|
cols = n_cols, # number of columns
|
958
1317
|
subplot_titles = list(categories), # title for each subplot
|
959
|
-
vertical_spacing = vertical_spacing /
|
1318
|
+
vertical_spacing = vertical_spacing / n_rows, # space between subplots
|
960
1319
|
horizontal_spacing = horizontal_spacing # space between subplots
|
961
1320
|
)
|
962
1321
|
|
@@ -1034,14 +1393,48 @@ def bar_subplots(df, col_x, col_y, col_cat, color_palette, n_cols=4, n_top_words
|
|
1034
1393
|
)
|
1035
1394
|
return fig
|
1036
1395
|
|
1037
|
-
def pie_subplots(df
|
1038
|
-
|
1396
|
+
def pie_subplots(df: pd.DataFrame,
|
1397
|
+
col_x: str,
|
1398
|
+
col_y: str,
|
1399
|
+
col_cat: str,
|
1400
|
+
col_color: str,
|
1401
|
+
n_cols: int = 4,
|
1402
|
+
horizontal_spacing: float = 0.2,
|
1403
|
+
vertical_spacing: float = 0.08,
|
1404
|
+
title: str = "Top words per topic",
|
1405
|
+
template: str = "plotly",
|
1406
|
+
width: int = 500,
|
1407
|
+
height: int = 150,
|
1408
|
+
plot_bgcolor: str = None,
|
1409
|
+
paper_bgcolor: str = None,
|
1410
|
+
showlegend: bool = True) -> go.Figure:
|
1411
|
+
"""
|
1412
|
+
Create subplots of pie charts.
|
1413
|
+
|
1414
|
+
Args:
|
1415
|
+
df (pd.DataFrame): DataFrame containing data for pie charts.
|
1416
|
+
col_x (str): Name of the column containing labels.
|
1417
|
+
col_y (str): Name of the column containing values.
|
1418
|
+
col_cat (str): Name of the column containing categories.
|
1419
|
+
col_color (str): Name of the column containing colors.
|
1420
|
+
n_cols (int, optional): Number of columns in the subplot grid. Defaults to 4.
|
1421
|
+
horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.2.
|
1422
|
+
vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
|
1423
|
+
title (str, optional): Title of the subplot. Defaults to "Top words per topic".
|
1424
|
+
template (str, optional): Plotly template to use. Defaults to "plotly".
|
1425
|
+
width (int, optional): Width of each subplot. Defaults to 500.
|
1426
|
+
height (int, optional): Height of each subplot. Defaults to 150.
|
1427
|
+
plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
|
1428
|
+
paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
|
1429
|
+
showlegend (bool, optional): Whether to display the legend. Defaults to True.
|
1430
|
+
|
1431
|
+
Returns:
|
1432
|
+
go.Figure: Plotly Figure object representing the subplots of pie charts.
|
1433
|
+
"""
|
1039
1434
|
categories = df[col_cat].unique()
|
1040
1435
|
|
1041
1436
|
# user define a number of columns, we compute the number of rows requires
|
1042
1437
|
n_rows = math.ceil(len(categories) / n_cols)
|
1043
|
-
|
1044
|
-
horizontal_spacing = (horizontal_spacing / n_rows)
|
1045
1438
|
|
1046
1439
|
specs = [[{'type':'domain'}] * n_cols] * n_rows
|
1047
1440
|
# create subplots
|
@@ -1049,8 +1442,8 @@ def pie_subplots(df, col_x, col_y, col_cat, col_color, n_cols=4, horizontal_spac
|
|
1049
1442
|
rows=n_rows,
|
1050
1443
|
cols=n_cols,
|
1051
1444
|
subplot_titles=list(categories),
|
1052
|
-
horizontal_spacing=horizontal_spacing,
|
1053
|
-
vertical_spacing=vertical_spacing,
|
1445
|
+
horizontal_spacing=horizontal_spacing / n_cols,
|
1446
|
+
vertical_spacing=vertical_spacing / n_rows,
|
1054
1447
|
specs=specs
|
1055
1448
|
)
|
1056
1449
|
|
@@ -1103,8 +1496,44 @@ def pie_subplots(df, col_x, col_y, col_cat, col_color, n_cols=4, horizontal_spac
|
|
1103
1496
|
return fig
|
1104
1497
|
|
1105
1498
|
|
1106
|
-
def horizontal_stacked_bars(df
|
1499
|
+
def horizontal_stacked_bars(df: pd.DataFrame,
|
1500
|
+
col_x: str,
|
1501
|
+
col_y: str,
|
1502
|
+
col_percentage: str,
|
1503
|
+
col_cat: str,
|
1504
|
+
col_color: str,
|
1505
|
+
title_text: str = "Sentiment per topic",
|
1506
|
+
width: int = 1200,
|
1507
|
+
height: int = 1200,
|
1508
|
+
xaxis_tickangle: int = 0,
|
1509
|
+
horizontal_spacing: float = 0,
|
1510
|
+
vertical_spacing: float = 0.08,
|
1511
|
+
plot_bgcolor: str = None,
|
1512
|
+
paper_bgcolor: str = None,
|
1513
|
+
template: str = "plotly") -> go.Figure:
|
1514
|
+
"""
|
1515
|
+
Create horizontal stacked bar plots.
|
1107
1516
|
|
1517
|
+
Args:
|
1518
|
+
df (pd.DataFrame): DataFrame containing data for the bar plots.
|
1519
|
+
col_x (str): Name of the column containing x-axis values.
|
1520
|
+
col_y (str): Name of the column containing y-axis values.
|
1521
|
+
col_percentage (str): Name of the column containing percentage values.
|
1522
|
+
col_cat (str): Name of the column containing categories.
|
1523
|
+
col_color (str): Name of the column containing colors.
|
1524
|
+
title_text (str, optional): Title of the plot. Defaults to "Sentiment per topic".
|
1525
|
+
width (int, optional): Width of the plot. Defaults to 1200.
|
1526
|
+
height (int, optional): Height of the plot. Defaults to 1200.
|
1527
|
+
xaxis_tickangle (int, optional): Angle for x-axis ticks. Defaults to 0.
|
1528
|
+
horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.
|
1529
|
+
vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
|
1530
|
+
plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
|
1531
|
+
paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
|
1532
|
+
template (str, optional): Plotly template to use. Defaults to "plotly".
|
1533
|
+
|
1534
|
+
Returns:
|
1535
|
+
go.Figure: Plotly Figure object representing the horizontal stacked bar plots.
|
1536
|
+
"""
|
1108
1537
|
categories = df[col_cat].unique()
|
1109
1538
|
|
1110
1539
|
n_cols=2
|
@@ -1112,8 +1541,8 @@ def horizontal_stacked_bars(df, col_x, col_y, col_percentage, col_cat, col_color
|
|
1112
1541
|
rows = 1, # number of rows
|
1113
1542
|
cols = 2, # number of columns
|
1114
1543
|
# subplot_titles = list(categories), # title for each subplot
|
1115
|
-
vertical_spacing = vertical_spacing
|
1116
|
-
horizontal_spacing =
|
1544
|
+
vertical_spacing = vertical_spacing, # space between subplots
|
1545
|
+
horizontal_spacing = horizontal_spacing / n_cols # space between subplots
|
1117
1546
|
)
|
1118
1547
|
|
1119
1548
|
for cat in categories:
|
@@ -1181,8 +1610,44 @@ def horizontal_stacked_bars(df, col_x, col_y, col_percentage, col_cat, col_color
|
|
1181
1610
|
|
1182
1611
|
return fig
|
1183
1612
|
|
1184
|
-
def bar_trend_per_day(df
|
1613
|
+
def bar_trend_per_day(df: pd.DataFrame,
|
1614
|
+
col_date: str,
|
1615
|
+
col_metric1: str,
|
1616
|
+
col_metric2: str,
|
1617
|
+
xaxis_title: str = "Date",
|
1618
|
+
y1_axis_title: str = "Verbatims",
|
1619
|
+
y2_axis_title: str = "Engagements",
|
1620
|
+
title_text: str = "Trend - couverture & résonance",
|
1621
|
+
width: int = 1500,
|
1622
|
+
height: int = 700,
|
1623
|
+
marker_color: str = "indianred",
|
1624
|
+
line_color: str = "#273746",
|
1625
|
+
plot_bgcolor: str = None,
|
1626
|
+
paper_bgcolor: str = None,
|
1627
|
+
template: str = "plotly") -> go.Figure:
|
1628
|
+
"""
|
1629
|
+
Creates a Plotly stacked bar chart with a secondary line plot for two metrics over time.
|
1630
|
+
|
1631
|
+
Parameters:
|
1632
|
+
- df (pd.DataFrame): The DataFrame containing the data.
|
1633
|
+
- col_date (str): The name of the column containing dates.
|
1634
|
+
- col_metric1 (str): The name of the column containing the first metric values.
|
1635
|
+
- col_metric2 (str): The name of the column containing the second metric values.
|
1636
|
+
- xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
|
1637
|
+
- y1_axis_title (str, optional): The title for the primary y-axis. Defaults to "Verbatims".
|
1638
|
+
- y2_axis_title (str, optional): The title for the secondary y-axis. Defaults to "Engagements".
|
1639
|
+
- title_text (str, optional): The title text for the chart. Defaults to "Trend - couverture & résonance".
|
1640
|
+
- width (int, optional): The width of the chart. Defaults to 1500.
|
1641
|
+
- height (int, optional): The height of the chart. Defaults to 700.
|
1642
|
+
- marker_color (str, optional): The color of the bars. Defaults to "indianred".
|
1643
|
+
- line_color (str, optional): The color of the line plot. Defaults to "#273746".
|
1644
|
+
- plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1645
|
+
- paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1646
|
+
- template (str, optional): The template of the chart. Defaults to "plotly".
|
1185
1647
|
|
1648
|
+
Returns:
|
1649
|
+
- fig (go.Figure): The Plotly Figure object representing the stacked bar chart with line plot.
|
1650
|
+
"""
|
1186
1651
|
# Plotly Stacked Bar Chart
|
1187
1652
|
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
1188
1653
|
hovertemplate='<b>Date :</b>'+ df[col_date].astype(str) + '<br><b>'+y1_axis_title+'</b>:'+ df[col_metric1].astype(str)+ '<br><b>'+y2_axis_title+'</b>:'+ df[col_metric2].astype(int).astype(str)
|
@@ -1251,8 +1716,46 @@ def bar_trend_per_day(df, col_date, col_metric1, col_metric2, xaxis_title = "Da
|
|
1251
1716
|
|
1252
1717
|
return fig
|
1253
1718
|
|
1254
|
-
def bar_trend_per_day_per_cat(df
|
1719
|
+
def bar_trend_per_day_per_cat(df: pd.DataFrame,
|
1720
|
+
col_date: str,
|
1721
|
+
col_cat: str,
|
1722
|
+
col_metric1: str,
|
1723
|
+
col_metric2: str,
|
1724
|
+
col_color: str,
|
1725
|
+
xaxis_title: str = "Date",
|
1726
|
+
y1_axis_title: str = "Verbatims",
|
1727
|
+
y2_axis_title: str = "Engagements",
|
1728
|
+
title_text: str = "Trend - couverture & résonance",
|
1729
|
+
vertical_spacing: float = 0.1,
|
1730
|
+
width: int = 1500,
|
1731
|
+
height: int = 700,
|
1732
|
+
plot_bgcolor: str = None,
|
1733
|
+
paper_bgcolor: str = None,
|
1734
|
+
template: str = "plotly") -> go.Figure:
|
1735
|
+
"""
|
1736
|
+
Creates a Plotly stacked bar chart with multiple categories, each represented as a separate subplot.
|
1737
|
+
|
1738
|
+
Parameters:
|
1739
|
+
- df (pd.DataFrame): The DataFrame containing the data.
|
1740
|
+
- col_date (str): The name of the column containing dates.
|
1741
|
+
- col_cat (str): The name of the column containing categories.
|
1742
|
+
- col_metric1 (str): The name of the column containing the first metric values.
|
1743
|
+
- col_metric2 (str): The name of the column containing the second metric values.
|
1744
|
+
- col_color (str): The name of the column containing the color codes for each category.
|
1745
|
+
- xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
|
1746
|
+
- y1_axis_title (str, optional): The title for the primary y-axis. Defaults to "Verbatims".
|
1747
|
+
- y2_axis_title (str, optional): The title for the secondary y-axis. Defaults to "Engagements".
|
1748
|
+
- title_text (str, optional): The title text for the chart. Defaults to "Trend - couverture & résonance".
|
1749
|
+
- vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
|
1750
|
+
- width (int, optional): The width of the chart. Defaults to 1500.
|
1751
|
+
- height (int, optional): The height of the chart. Defaults to 700.
|
1752
|
+
- plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1753
|
+
- paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1754
|
+
- template (str, optional): The template of the chart. Defaults to "plotly".
|
1255
1755
|
|
1756
|
+
Returns:
|
1757
|
+
- fig (go.Figure): The Plotly Figure object representing the stacked bar chart with subplots for each category.
|
1758
|
+
"""
|
1256
1759
|
fig = make_subplots(
|
1257
1760
|
rows = 2, # number of rows
|
1258
1761
|
cols = 1, # number of columns
|
@@ -1333,8 +1836,36 @@ def bar_trend_per_day_per_cat(df, col_date, col_cat, col_metric1, col_metric2, c
|
|
1333
1836
|
|
1334
1837
|
return fig
|
1335
1838
|
|
1336
|
-
def pie(df
|
1337
|
-
|
1839
|
+
def pie(df: pd.DataFrame,
|
1840
|
+
col_x: str,
|
1841
|
+
col_y: str,
|
1842
|
+
col_color: str,
|
1843
|
+
title: str = "Sentiment",
|
1844
|
+
template: str = "plotly",
|
1845
|
+
width: int = 1000,
|
1846
|
+
height: int = 1000,
|
1847
|
+
plot_bgcolor: str = None,
|
1848
|
+
paper_bgcolor: str = None,
|
1849
|
+
showlegend: bool = True) -> go.Figure:
|
1850
|
+
"""
|
1851
|
+
Creates a Plotly pie chart.
|
1852
|
+
|
1853
|
+
Parameters:
|
1854
|
+
- df (pd.DataFrame): The DataFrame containing the data.
|
1855
|
+
- col_x (str): The name of the column containing the labels for the pie chart slices.
|
1856
|
+
- col_y (str): The name of the column containing the values for the pie chart slices.
|
1857
|
+
- col_color (str): The name of the column containing the colors for the pie chart slices.
|
1858
|
+
- title (str, optional): The title for the pie chart. Defaults to "Sentiment".
|
1859
|
+
- template (str, optional): The template of the chart. Defaults to "plotly".
|
1860
|
+
- width (int, optional): The width of the chart. Defaults to 1000.
|
1861
|
+
- height (int, optional): The height of the chart. Defaults to 1000.
|
1862
|
+
- plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1863
|
+
- paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1864
|
+
- showlegend (bool, optional): Whether to show the legend. Defaults to True.
|
1865
|
+
|
1866
|
+
Returns:
|
1867
|
+
- fig (go.Figure): The Plotly Figure object representing the pie chart.
|
1868
|
+
"""
|
1338
1869
|
fig = go.Figure()
|
1339
1870
|
fig.add_trace(go.Pie(
|
1340
1871
|
labels=df[col_x],
|
@@ -1373,8 +1904,40 @@ def pie(df, col_x, col_y, col_color, title = "Sentiment", template = "plotly",
|
|
1373
1904
|
)
|
1374
1905
|
return fig
|
1375
1906
|
|
1376
|
-
def bar(df
|
1907
|
+
def bar(df: pd.DataFrame,
|
1908
|
+
x: str,
|
1909
|
+
y: str,
|
1910
|
+
color: str = "indianred",
|
1911
|
+
xaxis_title: str = "x",
|
1912
|
+
yaxis_title: str = "y",
|
1913
|
+
width: int = 1200,
|
1914
|
+
height: int = 700,
|
1915
|
+
title_text: str = "",
|
1916
|
+
plot_bgcolor: str = None,
|
1917
|
+
paper_bgcolor: str = None,
|
1918
|
+
template: str = "plotly",
|
1919
|
+
showlegend: bool = True) -> go.Figure:
|
1920
|
+
"""
|
1921
|
+
Creates a Plotly vertical bar chart.
|
1922
|
+
|
1923
|
+
Parameters:
|
1924
|
+
- df (pd.DataFrame): The DataFrame containing the data.
|
1925
|
+
- x (str): The name of the column containing the x-axis values.
|
1926
|
+
- y (str): The name of the column containing the y-axis values.
|
1927
|
+
- color (str, optional): The color of the bars. Defaults to "indianred".
|
1928
|
+
- xaxis_title (str, optional): The title for the x-axis. Defaults to "x".
|
1929
|
+
- yaxis_title (str, optional): The title for the y-axis. Defaults to "y".
|
1930
|
+
- width (int, optional): The width of the chart. Defaults to 1200.
|
1931
|
+
- height (int, optional): The height of the chart. Defaults to 700.
|
1932
|
+
- title_text (str, optional): The title text for the chart. Defaults to "".
|
1933
|
+
- plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
1934
|
+
- paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
1935
|
+
- template (str, optional): The template of the chart. Defaults to "plotly".
|
1936
|
+
- showlegend (bool, optional): Whether to show the legend. Defaults to True.
|
1377
1937
|
|
1938
|
+
Returns:
|
1939
|
+
- fig (go.Figure): The Plotly Figure object representing the vertical bar chart.
|
1940
|
+
"""
|
1378
1941
|
fig = go.Figure()
|
1379
1942
|
fig.add_trace(
|
1380
1943
|
go.Bar(
|
@@ -1411,7 +1974,28 @@ def bar(df, x, y, color="indianred", xaxis_title="x", yaxis_title="y", width=120
|
|
1411
1974
|
return fig
|
1412
1975
|
|
1413
1976
|
|
1414
|
-
def add_horizontal_line(fig
|
1977
|
+
def add_horizontal_line(fig: go.Figure,
|
1978
|
+
y: float,
|
1979
|
+
line_color: str = "gray",
|
1980
|
+
line_width: float = 1.5,
|
1981
|
+
line_dash: str = "dash",
|
1982
|
+
annotation_text: str = "Longueur moyenne des textes",
|
1983
|
+
annotation_position: str = "top right") -> go.Figure:
|
1984
|
+
"""
|
1985
|
+
Adds a horizontal line to a Plotly Figure object.
|
1986
|
+
|
1987
|
+
Parameters:
|
1988
|
+
- fig (go.Figure): The Plotly Figure object to which the horizontal line will be added.
|
1989
|
+
- y (float): The y-coordinate of the horizontal line.
|
1990
|
+
- line_color (str, optional): The color of the horizontal line. Defaults to "gray".
|
1991
|
+
- line_width (float, optional): The width of the horizontal line. Defaults to 1.5.
|
1992
|
+
- line_dash (str, optional): The dash style of the horizontal line. Defaults to "dash".
|
1993
|
+
- annotation_text (str, optional): The text annotation associated with the horizontal line. Defaults to "Longueur moyenne des textes".
|
1994
|
+
- annotation_position (str, optional): The position of the annotation relative to the horizontal line. Defaults to "top right".
|
1995
|
+
|
1996
|
+
Returns:
|
1997
|
+
- fig (go.Figure): The Plotly Figure object with the horizontal line added.
|
1998
|
+
"""
|
1415
1999
|
fig.add_hline(
|
1416
2000
|
y=y,
|
1417
2001
|
line_width=line_width,
|
@@ -1422,7 +2006,28 @@ def add_horizontal_line(fig, y, line_color = "gray", line_width = 1.5, line_dash
|
|
1422
2006
|
)
|
1423
2007
|
return fig
|
1424
2008
|
|
1425
|
-
def add_vertical_line(fig
|
2009
|
+
def add_vertical_line(fig: go.Figure,
|
2010
|
+
x: float,
|
2011
|
+
line_color: str = "gray",
|
2012
|
+
line_width: float = 1.5,
|
2013
|
+
line_dash: str = "dash",
|
2014
|
+
annotation_text: str = "Longueur moyenne des textes",
|
2015
|
+
annotation_position: str = "top right") -> go.Figure:
|
2016
|
+
"""
|
2017
|
+
Adds a vertical line to a Plotly Figure object.
|
2018
|
+
|
2019
|
+
Parameters:
|
2020
|
+
- fig (go.Figure): The Plotly Figure object to which the vertical line will be added.
|
2021
|
+
- x (float): The x-coordinate of the vertical line.
|
2022
|
+
- line_color (str, optional): The color of the vertical line. Defaults to "gray".
|
2023
|
+
- line_width (float, optional): The width of the vertical line. Defaults to 1.5.
|
2024
|
+
- line_dash (str, optional): The dash style of the vertical line. Defaults to "dash".
|
2025
|
+
- annotation_text (str, optional): The text annotation associated with the vertical line. Defaults to "Longueur moyenne des textes".
|
2026
|
+
- annotation_position (str, optional): The position of the annotation relative to the vertical line. Defaults to "top right".
|
2027
|
+
|
2028
|
+
Returns:
|
2029
|
+
- fig (go.Figure): The Plotly Figure object with the vertical line added.
|
2030
|
+
"""
|
1426
2031
|
fig.add_vline(
|
1427
2032
|
x=x,
|
1428
2033
|
line_width=line_width,
|
@@ -1433,9 +2038,50 @@ def add_vertical_line(fig, x, line_color = "gray", line_width = 1.5, line_dash =
|
|
1433
2038
|
)
|
1434
2039
|
return fig
|
1435
2040
|
|
1436
|
-
def network_graph(T
|
1437
|
-
|
1438
|
-
|
2041
|
+
def network_graph(T: nx.Graph,
|
2042
|
+
col_size: str = "scaled_size",
|
2043
|
+
col_color: str = "modularity_color",
|
2044
|
+
title_text: str = "Analyse de similitudes",
|
2045
|
+
sample_nodes: float = 0.15,
|
2046
|
+
show_edges: bool = True,
|
2047
|
+
show_halo: bool = False,
|
2048
|
+
textposition: str = None,
|
2049
|
+
line_color: str = "#B7B7B7",
|
2050
|
+
line_dash: str = "dot",
|
2051
|
+
edge_mode: str = "lines+markers",
|
2052
|
+
node_mode: str = "markers+text",
|
2053
|
+
opacity: float = 0.2,
|
2054
|
+
width: int = 1600,
|
2055
|
+
height: int = 1200,
|
2056
|
+
plot_bgcolor: str = None,
|
2057
|
+
paper_bgcolor: str = None,
|
2058
|
+
template: str = "plotly") -> go.Figure:
|
2059
|
+
"""
|
2060
|
+
Creates a network graph visualization using Plotly.
|
2061
|
+
|
2062
|
+
Parameters:
|
2063
|
+
- T (nx.Graph): The NetworkX graph object.
|
2064
|
+
- col_size (str, optional): The column name for node size. Defaults to "scaled_size".
|
2065
|
+
- col_color (str, optional): The column name for node color. Defaults to "modularity_color".
|
2066
|
+
- title_text (str, optional): The title for the graph. Defaults to "Analyse de similitudes".
|
2067
|
+
- sample_nodes (float, optional): The proportion of nodes to sample for displaying labels. Defaults to 0.15.
|
2068
|
+
- show_edges (bool, optional): Whether to display edges. Defaults to True.
|
2069
|
+
- show_halo (bool, optional): Whether to display halo around nodes. Defaults to False.
|
2070
|
+
- textposition (str, optional): The position of node labels. Defaults to None.
|
2071
|
+
- line_color (str, optional): The color of edges. Defaults to "#B7B7B7".
|
2072
|
+
- line_dash (str, optional): The dash style of edges. Defaults to "dot".
|
2073
|
+
- edge_mode (str, optional): The mode for displaying edges. Defaults to "lines+markers".
|
2074
|
+
- node_mode (str, optional): The mode for displaying nodes. Defaults to "markers+text".
|
2075
|
+
- opacity (float, optional): The opacity of nodes. Defaults to 0.2.
|
2076
|
+
- width (int, optional): The width of the plot. Defaults to 1600.
|
2077
|
+
- height (int, optional): The height of the plot. Defaults to 1200.
|
2078
|
+
- plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
|
2079
|
+
- paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
|
2080
|
+
- template (str, optional): The template of the plot. Defaults to "plotly".
|
2081
|
+
|
2082
|
+
Returns:
|
2083
|
+
- fig (go.Figure): The Plotly Figure object representing the network graph visualization.
|
2084
|
+
"""
|
1439
2085
|
# on construit un dataframe des noeuds à partir des données du graphe pour plus de simplicité
|
1440
2086
|
df_nodes=pd.DataFrame()
|
1441
2087
|
for node in T.nodes(data=True):
|
@@ -1548,7 +2194,24 @@ def network_graph(T, col_size="scaled_size", col_color="modularity_color", titl
|
|
1548
2194
|
|
1549
2195
|
return fig
|
1550
2196
|
|
1551
|
-
def richesse_lexicale(df
|
2197
|
+
def richesse_lexicale(df: pd.DataFrame,
|
2198
|
+
title: str = "Richesse lexicale",
|
2199
|
+
width: int = 1200,
|
2200
|
+
height: int = 1000,
|
2201
|
+
template: str = "plotly") -> go.Figure:
|
2202
|
+
"""
|
2203
|
+
Creates a lexical richness visualization using Plotly.
|
2204
|
+
|
2205
|
+
Parameters:
|
2206
|
+
- df (pd.DataFrame): The DataFrame containing word frequency data.
|
2207
|
+
- title (str, optional): The title for the plot. Defaults to "Richesse lexicale".
|
2208
|
+
- width (int, optional): The width of the plot. Defaults to 1200.
|
2209
|
+
- height (int, optional): The height of the plot. Defaults to 1000.
|
2210
|
+
- template (str, optional): The template of the plot. Defaults to "plotly".
|
2211
|
+
|
2212
|
+
Returns:
|
2213
|
+
- fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization.
|
2214
|
+
"""
|
1552
2215
|
df = create_frequency_table(df, "freq")
|
1553
2216
|
fig_richesse = go.Figure()
|
1554
2217
|
fig_richesse.add_trace(
|
@@ -1569,7 +2232,26 @@ def richesse_lexicale(df, title= "Richesse lexicale", width=1200, height=1000, t
|
|
1569
2232
|
fig_richesse.update_yaxes(tickformat=".0f", title_text="Freq", type="log")
|
1570
2233
|
return fig_richesse
|
1571
2234
|
|
1572
|
-
def richesse_lexicale_per_topic(df
|
2235
|
+
def richesse_lexicale_per_topic(df: pd.DataFrame,
|
2236
|
+
col_topic: str,
|
2237
|
+
title: str = "Richesse lexicale par topic",
|
2238
|
+
width: int = 1200,
|
2239
|
+
height: int = 1000,
|
2240
|
+
template: str = "plotly") -> go.Figure:
|
2241
|
+
"""
|
2242
|
+
Creates a lexical richness visualization per topic using Plotly.
|
2243
|
+
|
2244
|
+
Parameters:
|
2245
|
+
- df (pd.DataFrame): The DataFrame containing word frequency data.
|
2246
|
+
- col_topic (str): The name of the column representing topics.
|
2247
|
+
- title (str, optional): The title for the plot. Defaults to "Richesse lexicale par topic".
|
2248
|
+
- width (int, optional): The width of the plot. Defaults to 1200.
|
2249
|
+
- height (int, optional): The height of the plot. Defaults to 1000.
|
2250
|
+
- template (str, optional): The template of the plot. Defaults to "plotly".
|
2251
|
+
|
2252
|
+
Returns:
|
2253
|
+
- fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization per topic.
|
2254
|
+
"""
|
1573
2255
|
fig_richesse = go.Figure()
|
1574
2256
|
for topic in list(df[col_topic].unique()):
|
1575
2257
|
df_tmp = create_frequency_table(df[df[col_topic]==topic], "freq")
|
@@ -1591,8 +2273,42 @@ def richesse_lexicale_per_topic(df, col_topic, title= "Richesse lexicale par top
|
|
1591
2273
|
fig_richesse.update_yaxes(tickformat=".0f", title_text="Freq", type="log")
|
1592
2274
|
return fig_richesse
|
1593
2275
|
|
1594
|
-
def subplots_bar_per_day_per_cat(df
|
2276
|
+
def subplots_bar_per_day_per_cat(df: pd.DataFrame,
|
2277
|
+
col_date: str,
|
2278
|
+
col_cat: str,
|
2279
|
+
metrics: list,
|
2280
|
+
col_color: str,
|
2281
|
+
y_axis_titles: list,
|
2282
|
+
xaxis_title: str = "Date",
|
2283
|
+
title_text: str = "Trend - couverture & résonance",
|
2284
|
+
vertical_spacing: float = 0.1,
|
2285
|
+
width: int = 1500,
|
2286
|
+
height: int = 700,
|
2287
|
+
plot_bgcolor: str = None,
|
2288
|
+
paper_bgcolor: str = None,
|
2289
|
+
template: str = "plotly") -> go.Figure:
|
2290
|
+
"""
|
2291
|
+
Creates subplots of stacked bar charts per day and category using Plotly.
|
2292
|
+
|
2293
|
+
Parameters:
|
2294
|
+
- df (pd.DataFrame): The DataFrame containing the data.
|
2295
|
+
- col_date (str): The name of the column representing dates.
|
2296
|
+
- col_cat (str): The name of the column representing categories.
|
2297
|
+
- metrics (List[str]): A list of column names representing metrics to be plotted.
|
2298
|
+
- col_color (str): The name of the column representing colors for bars.
|
2299
|
+
- y_axis_titles (List[str]): A list of titles for the y-axes of subplots.
|
2300
|
+
- xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
|
2301
|
+
- title_text (str, optional): The title for the entire plot. Defaults to "Trend - couverture & résonance".
|
2302
|
+
- vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
|
2303
|
+
- width (int, optional): The width of the entire plot. Defaults to 1500.
|
2304
|
+
- height (int, optional): The height of each subplot. Defaults to 700.
|
2305
|
+
- plot_bgcolor (str, optional): The background color for the plot area. Defaults to None.
|
2306
|
+
- paper_bgcolor (str, optional): The background color for the paper area. Defaults to None.
|
2307
|
+
- template (str, optional): The template of the plot. Defaults to "plotly".
|
1595
2308
|
|
2309
|
+
Returns:
|
2310
|
+
- fig (go.Figure): The Plotly Figure object representing the subplots of stacked bar charts.
|
2311
|
+
"""
|
1596
2312
|
fig = make_subplots(
|
1597
2313
|
rows = len(metrics), # number of rows
|
1598
2314
|
cols = 1, # number of columns
|
@@ -1668,7 +2384,38 @@ def subplots_bar_per_day_per_cat(df, col_date, col_cat, metrics, col_color, y_ax
|
|
1668
2384
|
return fig
|
1669
2385
|
|
1670
2386
|
|
1671
|
-
def add_shape(fig
|
2387
|
+
def add_shape(fig: go.Figure,
|
2388
|
+
shape_type: str = "rect",
|
2389
|
+
x0: float = -1,
|
2390
|
+
y0: float = -1,
|
2391
|
+
x1: float = 0,
|
2392
|
+
y1: float = 0,
|
2393
|
+
fillcolor: str = 'Silver',
|
2394
|
+
opacity: float = 0.1,
|
2395
|
+
line_width: float = 0,
|
2396
|
+
line_color: str = 'white',
|
2397
|
+
dash: str = None,
|
2398
|
+
layer: str = "below") -> go.Figure:
|
2399
|
+
"""
|
2400
|
+
Adds a shape to a Plotly figure.
|
2401
|
+
|
2402
|
+
Parameters:
|
2403
|
+
- fig (go.Figure): The Plotly Figure object.
|
2404
|
+
- shape_type (str, optional): The type of shape to add. Defaults to "rect".
|
2405
|
+
- x0 (float, optional): The x-coordinate of the lower left corner of the shape. Defaults to -1.
|
2406
|
+
- y0 (float, optional): The y-coordinate of the lower left corner of the shape. Defaults to -1.
|
2407
|
+
- x1 (float, optional): The x-coordinate of the upper right corner of the shape. Defaults to 0.
|
2408
|
+
- y1 (float, optional): The y-coordinate of the upper right corner of the shape. Defaults to 0.
|
2409
|
+
- fillcolor (str, optional): The fill color of the shape. Defaults to 'Silver'.
|
2410
|
+
- opacity (float, optional): The opacity of the shape. Defaults to 0.1.
|
2411
|
+
- line_width (float, optional): The width of the shape's outline. Defaults to 0.
|
2412
|
+
- line_color (str, optional): The color of the shape's outline. Defaults to 'white'.
|
2413
|
+
- dash (str, optional): The dash style of the shape's outline. Defaults to None.
|
2414
|
+
- layer (str, optional): The layer on which the shape is added, either 'below' or 'above' the data. Defaults to "below".
|
2415
|
+
|
2416
|
+
Returns:
|
2417
|
+
- fig (go.Figure): The modified Plotly Figure object with the added shape.
|
2418
|
+
"""
|
1672
2419
|
fig.add_shape(
|
1673
2420
|
# Shape for the area between (-1, 0)
|
1674
2421
|
{
|
@@ -1688,4 +2435,46 @@ def add_shape(fig, shape_type = "rect", x0= -1, y0= -1, x1 = 0, y1=0, fillcolor=
|
|
1688
2435
|
|
1689
2436
|
}
|
1690
2437
|
)
|
2438
|
+
return fig
|
2439
|
+
|
2440
|
+
def add_image(fig: go.Figure,
|
2441
|
+
xref: str = "paper",
|
2442
|
+
yref: str = "paper",
|
2443
|
+
x: float = 0,
|
2444
|
+
y: float = 0,
|
2445
|
+
sizex: float = 0.08,
|
2446
|
+
sizey: float = 0.08,
|
2447
|
+
xanchor: str = "right",
|
2448
|
+
yanchor: str = "bottom",
|
2449
|
+
source: str = "") -> go.Figure:
|
2450
|
+
"""
|
2451
|
+
Adds an image to a Plotly figure.
|
2452
|
+
|
2453
|
+
Parameters:
|
2454
|
+
- fig (go.Figure): The Plotly Figure object.
|
2455
|
+
- xref (str, optional): The x-coordinate reference point. Defaults to "paper".
|
2456
|
+
- yref (str, optional): The y-coordinate reference point. Defaults to "paper".
|
2457
|
+
- x (float, optional): The x-coordinate of the image position. Defaults to 0.
|
2458
|
+
- y (float, optional): The y-coordinate of the image position. Defaults to 0.
|
2459
|
+
- sizex (float, optional): The size of the image in the x-direction. Defaults to 0.08.
|
2460
|
+
- sizey (float, optional): The size of the image in the y-direction. Defaults to 0.08.
|
2461
|
+
- xanchor (str, optional): The x-coordinate anchor point. Defaults to "right".
|
2462
|
+
- yanchor (str, optional): The y-coordinate anchor point. Defaults to "bottom".
|
2463
|
+
- source (str, optional): The URL source of the image. Defaults to "https://www.example.com/image.jpg".
|
2464
|
+
|
2465
|
+
Returns:
|
2466
|
+
- fig (go.Figure): The modified Plotly Figure object with the added image.
|
2467
|
+
"""
|
2468
|
+
fig.add_layout_image(
|
2469
|
+
dict(
|
2470
|
+
source=source,
|
2471
|
+
xref=xref,
|
2472
|
+
yref=yref,
|
2473
|
+
x=x, y=y,
|
2474
|
+
sizex=sizex,
|
2475
|
+
sizey=sizey,
|
2476
|
+
xanchor=xanchor,
|
2477
|
+
yanchor=yanchor
|
2478
|
+
)
|
2479
|
+
)
|
1691
2480
|
return fig
|