opsci-toolbox 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,55 +15,115 @@ import math
15
15
  import pandas as pd
16
16
  from opsci_toolbox.helpers.nlp import sample_most_engaging_posts, create_frequency_table
17
17
  from matplotlib.colors import to_hex
18
+ import networkx as nx
18
19
 
19
20
 
20
21
 
21
- def upload_chart_studio(username,api_key,fig, title):
22
+ def upload_chart_studio(
23
+ username: str,
24
+ api_key: str,
25
+ fig,
26
+ title: str
27
+ ) -> tuple:
22
28
  """
23
- Upload Plotly viz to chart studio
29
+ Upload a Plotly visualization to Chart Studio.
30
+
31
+ Args:
32
+ username (str): The Chart Studio username.
33
+ api_key (str): The Chart Studio API key.
34
+ fig: The Plotly figure object to be uploaded.
35
+ title (str): The title for the uploaded visualization.
36
+
37
+ Returns:
38
+ tuple: A tuple containing the URL of the uploaded visualization and the embed code.
24
39
  """
25
40
  URL = ""
26
41
  EMBED = ""
27
42
 
28
- try:
43
+ try:
44
+ # Set Chart Studio credentials
29
45
  tls.set_credentials_file(username=username, api_key=api_key)
30
- URL = py.plot(fig, filename = title, auto_open=True)
46
+
47
+ # Upload the figure to Chart Studio
48
+ URL = py.plot(fig, filename=title, auto_open=True)
49
+
50
+ # Get the embed code for the uploaded figure
31
51
  EMBED = tls.get_embed(URL)
32
- print("* URL DE LA VIZ >> ",URL)
33
- print("\n*CODE EMBED A COLLER \n",EMBED)
52
+
53
+ # Print the URL and embed code
54
+ print("* URL DE LA VIZ >> ", URL)
55
+ print("\n*CODE EMBED A COLLER \n", EMBED)
34
56
 
35
57
  except Exception as e:
36
- pass
58
+ # Print the exception message and a suggestion to reduce the visualization size
37
59
  print(e, "try to reduce the dataviz size by printing less data")
38
60
 
39
- return URL,EMBED
61
+ return URL, EMBED
40
62
 
63
+ def scale_to_0_10(x: pd.Series) -> pd.Series:
64
+ """
65
+ Scale a pandas Series to the range [0, 10].
66
+
67
+ Args:
68
+ x (pd.Series): The input pandas Series to be scaled.
41
69
 
42
- def scale_to_0_10(x):
70
+ Returns:
71
+ pd.Series: The scaled pandas Series with values in the range [0, 10].
72
+ """
43
73
  return ((x - x.min()) / (x.max() - x.min()) * 10).astype(int)
44
74
 
45
- def normalize_data_size(df, col:str, coef = 20, constant = 5):
75
+ def normalize_data_size(df: pd.DataFrame, col: str, coef: int = 20, constant: int = 5) -> pd.DataFrame:
46
76
  """
47
- Function to normalize the sizes of dots
77
+ Normalize the sizes of dots based on a specified column in a DataFrame.
78
+
79
+ Args:
80
+ df (pd.DataFrame): The input DataFrame.
81
+ col (str): The column name to be normalized.
82
+ coef (int, optional): The coefficient to scale the normalized values. Defaults to 20.
83
+ constant (int, optional): The constant to add to the scaled normalized values. Defaults to 5.
84
+
85
+ Returns:
86
+ pd.DataFrame: The DataFrame with an additional column for the normalized sizes.
48
87
  """
49
- df['normalized_'+col]=((df[col]-df[col].max())/(df[col]+df[col].max())+1) * coef + constant
88
+ df['normalized_' + col] = ((df[col] - df[col].max()) / (df[col] + df[col].max()) + 1) * coef + constant
50
89
  return df
51
90
 
52
- def generate_color_palette(lst, transparency=1):
91
+ def generate_color_palette(lst: list, transparency: float = 1) -> dict:
53
92
  """
54
- Function to generate a random color palette of RGBa codes
93
+ Generate a random color palette of RGBA codes.
94
+
95
+ Args:
96
+ lst (List[str]): List of color names or identifiers.
97
+ transparency (float, optional): Transparency value for RGBA colors (0 to 1). Defaults to 1.
98
+
99
+ Returns:
100
+ dict: Dictionary containing color names or identifiers as keys and corresponding RGBA codes as values.
55
101
  """
56
- color_palette = {color: 'rgba({}, {}, {}, {})'.format(
57
- random.randrange(0, 255),
58
- random.randrange(0, 255),
59
- random.randrange(0, 255),
60
- transparency
61
- ) for color in lst}
102
+ color_palette = {
103
+ color: 'rgba({}, {}, {}, {})'.format(
104
+ random.randrange(0, 255),
105
+ random.randrange(0, 255),
106
+ random.randrange(0, 255),
107
+ transparency
108
+ )
109
+ for color in lst
110
+ }
62
111
  return color_palette
63
112
 
64
- def generate_color_palette_with_colormap(lst, colormap = "viridis"):
113
+ def generate_color_palette_with_colormap(lst: list, colormap: str = "viridis") -> dict:
114
+ """
115
+ Generate a color palette with hexadecimal codes using a specified colormap.
116
+
117
+ Args:
118
+ lst (List[str]): List of color names or identifiers.
119
+ colormap (str, optional): Name of the colormap to use. Defaults to "viridis".
120
+
121
+ Returns:
122
+ Dict[str, str]: Dictionary containing color names or identifiers as keys and corresponding hexadecimal codes as values.
123
+ """
65
124
  num_colors = len(lst)
66
- # Generate some example data
125
+
126
+ # Generate example data
67
127
  data = np.linspace(0, 1, num_colors)
68
128
 
69
129
  # Choose the colormap
@@ -76,38 +136,69 @@ def generate_color_palette_with_colormap(lst, colormap = "viridis"):
76
136
  colors = cmap(norm(data))
77
137
 
78
138
  # Convert colors to hexadecimal codes
79
- hex_colors = {item : to_hex(colors[i]) for i, item in enumerate(lst)}
139
+ hex_colors = {item: to_hex(colors[i]) for i, item in enumerate(lst)}
80
140
 
81
141
  return hex_colors
82
142
 
83
- def generate_hexadecimal_color_palette(lst, add_transparency=False, transparency=0.5):
143
+ def generate_hexadecimal_color_palette(lst: list, add_transparency: bool = False, transparency: float = 0.5) -> dict:
84
144
  """
85
- Function to generate a random color palette with hexadecimal codes and transparency
145
+ Generate a random color palette with hexadecimal codes and optional transparency.
146
+
147
+ Args:
148
+ lst (List[str]): List of color names or identifiers.
149
+ add_transparency (bool, optional): Whether to add transparency to the colors. Defaults to False.
150
+ transparency (float, optional): Transparency value for the colors (0 to 1). Defaults to 0.5.
151
+
152
+ Returns:
153
+ Dict[str, str]: Dictionary containing color names or identifiers as keys and corresponding hexadecimal codes as values.
86
154
  """
87
155
  if add_transparency:
88
156
  alpha_hex = int(transparency * 255) # Convert transparency to integer (0-255 range)
89
- color_palette = {color: "#{:02x}{:02x}{:02x}{:02x}".format(
90
- random.randint(0, 255),
91
- random.randint(0, 255),
92
- random.randint(0, 255),
93
- alpha_hex
94
- ) for color in lst}
157
+ color_palette = {
158
+ color: "#{:02x}{:02x}{:02x}{:02x}".format(
159
+ random.randint(0, 255),
160
+ random.randint(0, 255),
161
+ random.randint(0, 255),
162
+ alpha_hex
163
+ )
164
+ for color in lst
165
+ }
95
166
  else:
96
- color_palette = {color: "#{:02x}{:02x}{:02x}".format(
97
- random.randint(0, 255),
98
- random.randint(0, 255),
99
- random.randint(0, 255)
100
- ) for color in lst}
167
+ color_palette = {
168
+ color: "#{:02x}{:02x}{:02x}".format(
169
+ random.randint(0, 255),
170
+ random.randint(0, 255),
171
+ random.randint(0, 255)
172
+ )
173
+ for color in lst
174
+ }
101
175
  return color_palette
102
176
 
103
- def generate_random_hexadecimal_color():
104
- return "#{:02x}{:02x}{:02x}".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
177
+ def generate_random_hexadecimal_color() -> str:
178
+ """
179
+ Generate a random hexadecimal color code.
180
+
181
+ Returns:
182
+ str: Hexadecimal color code.
183
+ """
184
+ return "#{:02x}{:02x}{:02x}".format(
185
+ random.randint(0, 255),
186
+ random.randint(0, 255),
187
+ random.randint(0, 255)
188
+ )
105
189
 
106
- def wrap_text(txt, lenght=50):
190
+ def wrap_text(txt: str, length: int = 50) -> str:
107
191
  """
108
- Function to wrap text (for hover)
192
+ Wrap text to a specified length.
193
+
194
+ Args:
195
+ txt (str): The text to wrap.
196
+ length (int, optional): The maximum length of each line. Defaults to 50.
197
+
198
+ Returns:
199
+ str: The wrapped text.
109
200
  """
110
- txt = '<br>'.join(textwrap.wrap(str(txt), width=lenght))
201
+ txt = '<br>'.join(textwrap.wrap(str(txt), width=length))
111
202
  return txt
112
203
 
113
204
  def get_convex_hull_coord(points: np.array, interpolate_curve: bool = True) -> tuple:
@@ -282,31 +373,45 @@ def get_convex_hull_coord(points: np.array, interpolate_curve: bool = True) -> t
282
373
 
283
374
  # return fig
284
375
 
285
- def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color, col_size, col_text, col_legend = [], title="Scatter Plot", x_axis_label="X-axis", y_axis_label="Y-axis", width=1000, height=1000, xaxis_range=None, yaxis_range=None,
286
- size_value =4, opacity=0.8, maxdisplayed=0, mode = "markers", textposition="bottom center", plot_bgcolor=None, paper_bgcolor=None, yaxis_showgrid = False, xaxis_showgrid = False, color="indianred", line_width=0.5, line_color="white", colorscale='Viridis', showscale=True, template="plotly"):
376
+ def create_scatter_plot(df: pd.DataFrame, col_x: str, col_y: str, col_category: str, color_palette: dict, col_color: str, col_size: str, col_text: str, col_legend: list = [], title: str = "Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", width: int = 1000, height: int = 1000, xaxis_range: list =None, yaxis_range: list =None, size_value: int = 4, opacity: float = 0.8, maxdisplayed: int = 0, mode: str = "markers", textposition: str = "bottom center", plot_bgcolor: str = None, paper_bgcolor: str = None, yaxis_showgrid: bool = False, xaxis_showgrid: bool = False, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", colorscale: str = 'Viridis', showscale: bool = True, template: str = "plotly") -> go.Figure:
287
377
  """
288
- Create a scatter plot :
289
- - df contains all data : X / Y values, category for colorization, sizes and text for hover.
290
- - col_x : name of the column containing X values
291
- - col_y : name of the column containing Y values
292
- - col_category : name of the column for colorization
293
- - color_palette : a dict mapping category with color value
294
- - col_color : name of the column for color ==> to be used only for continuous scale
295
- - col_size : name of the column for dot sizes
296
- - col_text : name of the column containing text for legend on hover
297
- - title : graph title
298
- - x_axis_label : label for X
299
- - y_axis_label : label for Y
300
- - width / height : size of the graphe
301
- - xaxis_range / y_axis_range : range values for axis. None for auto values.
302
- - size_value = minimun size (or constant) for dots
303
- - opacity : dots transparency
304
- - maxdisplayed : maximum number of dots to display. 0 = infinite
305
- - plot_bgcolor : background color for plot
306
- - paper_bgcolor : background color for the area around the plot
307
- - color : color code for dots if col_category is None
308
- - line_width : width of dots contours
309
- - line_color : color of dots contours
378
+ Create a scatter plot.
379
+
380
+ Args:
381
+ df (pd.DataFrame): DataFrame containing all data.
382
+ col_x (str): Name of the column containing X values.
383
+ col_y (str): Name of the column containing Y values.
384
+ col_category (str): Name of the column for colorization.
385
+ color_palette (dict): A dictionary mapping category with color value.
386
+ col_color (str): Name of the column for color. Only used for continuous scale.
387
+ col_size (str): Name of the column for dot sizes.
388
+ col_text (str): Name of the column containing text for legend on hover.
389
+ col_legend (List[str], optional): List of column names for legend. Defaults to [].
390
+ title (str, optional): Graph title. Defaults to "Scatter Plot".
391
+ x_axis_label (str, optional): Label for X-axis. Defaults to "X-axis".
392
+ y_axis_label (str, optional): Label for Y-axis. Defaults to "Y-axis".
393
+ width (int, optional): Size of the graph. Defaults to 1000.
394
+ height (int, optional): Size of the graph. Defaults to 1000.
395
+ xaxis_range (list, optional): Range values for X-axis. Defaults to None.
396
+ yaxis_range (list, optional): Range values for Y-axis. Defaults to None.
397
+ size_value (int, optional): Minimum size (or constant) for dots. Defaults to 4.
398
+ opacity (float, optional): Dots transparency. Defaults to 0.8.
399
+ maxdisplayed (int, optional): Maximum number of dots to display. 0 = infinite. Defaults to 0.
400
+ mode (str, optional): Mode for the scatter plot. Defaults to "markers".
401
+ textposition (str, optional): Text position for hover. Defaults to "bottom center".
402
+ plot_bgcolor (str, optional): Background color for plot. Defaults to None.
403
+ paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
404
+ yaxis_showgrid (bool, optional): Whether to show grid on Y-axis. Defaults to False.
405
+ xaxis_showgrid (bool, optional): Whether to show grid on X-axis. Defaults to False.
406
+ color (str, optional): Color code for dots if col_category is None. Defaults to "indianred".
407
+ line_width (float, optional): Width of dots contours. Defaults to 0.5.
408
+ line_color (str, optional): Color of dots contours. Defaults to "white".
409
+ colorscale (str, optional): Color scale for continuous color mapping. Defaults to 'Viridis'.
410
+ showscale (bool, optional): Whether to show color scale. Defaults to True.
411
+ template (str, optional): Plotly template. Defaults to "plotly".
412
+
413
+ Returns:
414
+ go.Figure: Plotly scatter plot figure.
310
415
  """
311
416
 
312
417
  if line_color is None :
@@ -422,52 +527,83 @@ def create_scatter_plot(df, col_x, col_y, col_category, color_palette, col_color
422
527
  )
423
528
  return fig
424
529
 
425
- def add_annotations(fig, df, col_x, col_y, col_txt, width=1000, label_size_ratio=100, bordercolor = "#C7C7C7", arrowcolor = "SlateGray", bgcolor ="#FFFFFF", font_color = "SlateGray"):
426
- df[col_txt]=df[col_txt].fillna("").astype(str)
530
+ def add_annotations(fig: go.Figure, df: pd.DataFrame, col_x: str, col_y: str, col_txt: str, width: int = 1000, label_size_ratio: int = 100, bordercolor: str = "#C7C7C7", arrowcolor: str = "SlateGray", bgcolor: str = "#FFFFFF", font_color: str = "SlateGray") -> go.Figure:
531
+ """
532
+ Add annotations to a Plotly figure.
533
+
534
+ Args:
535
+ fig (go.Figure): Plotly figure object.
536
+ df (pd.DataFrame): DataFrame containing annotation data.
537
+ col_x (str): Name of the column containing X values.
538
+ col_y (str): Name of the column containing Y values.
539
+ col_txt (str): Name of the column containing text for annotations.
540
+ width (int, optional): Width of the figure. Defaults to 1000.
541
+ label_size_ratio (int, optional): Ratio of label size to figure width. Defaults to 100.
542
+ bordercolor (str, optional): Color of annotation borders. Defaults to "#C7C7C7".
543
+ arrowcolor (str, optional): Color of annotation arrows. Defaults to "SlateGray".
544
+ bgcolor (str, optional): Background color of annotations. Defaults to "#FFFFFF".
545
+ font_color (str, optional): Color of annotation text. Defaults to "SlateGray".
546
+
547
+ Returns:
548
+ go.Figure: Plotly figure object with annotations added.
549
+ """
550
+ df[col_txt] = df[col_txt].fillna("").astype(str)
551
+
427
552
  for i, row in df.iterrows():
428
- fig.add_annotation(x=row[col_x],
429
- y=row[col_y],
430
- text='<b>'+wrap_text(row[col_txt])+'</b>',
431
- showarrow=True,
432
- arrowhead=1,
433
- font=dict(
434
- family="Helvetica, Sans-serif",
435
- size=width / label_size_ratio,
436
- color=font_color
437
- ),
438
- bordercolor=bordercolor,
439
- borderwidth=width / 1000,
440
- borderpad=width / 500,
441
- bgcolor=bgcolor,
442
- opacity=1,
443
- arrowcolor=arrowcolor
444
- )
553
+ fig.add_annotation(
554
+ x=row[col_x],
555
+ y=row[col_y],
556
+ text='<b>'+wrap_text(row[col_txt])+'</b>',
557
+ showarrow=True,
558
+ arrowhead=1,
559
+ font=dict(
560
+ family="Helvetica, Sans-serif",
561
+ size=width / label_size_ratio,
562
+ color=font_color
563
+ ),
564
+ bordercolor=bordercolor,
565
+ borderwidth=width / 1000,
566
+ borderpad=width / 500,
567
+ bgcolor=bgcolor,
568
+ opacity=1,
569
+ arrowcolor=arrowcolor
570
+ )
445
571
 
446
572
  return fig
447
573
 
448
- def scatter3D(df, col_x, col_y, col_z, col_category, color_palette, col_size, col_text, title="3D Scatter Plot", x_axis_label="X-axis", y_axis_label="Y-axis", z_axis_label="Z-axis", width=1000, height=1000, xaxis_range=None, yaxis_range=None,
449
- zaxis_range=None, size_value =4, opacity=0.8, plot_bgcolor=None, paper_bgcolor=None, color="indianred", line_width=0.5, line_color="white", template = "plotly"):
574
+ def scatter3D(df: pd.DataFrame, col_x: str, col_y: str, col_z: str, col_category: str, color_palette: dict, col_size: str, col_text: str, title: str = "3D Scatter Plot", x_axis_label: str = "X-axis", y_axis_label: str = "Y-axis", z_axis_label: str = "Z-axis", width: int = 1000, height: int = 1000, xaxis_range: list = None, yaxis_range: list = None, zaxis_range: list = None, size_value: int = 4, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, color: str = "indianred", line_width: float = 0.5, line_color: str = "white", template: str = "plotly") -> go.Figure:
450
575
  """
451
- Create a 3D scatter plot :
452
- - df contains all data : X / Y values, category for colorization, sizes and text for hover.
453
- - col_x : name of the column containing X values
454
- - col_y : name of the column containing Y values
455
- - col_z : name of the column containing Z values
456
- - col_category : name of the column for colorization
457
- - color_palette : a dict mapping category with color value
458
- - col_size : name of the column for dot sizes
459
- - col_text : name of the column containing text for legend on hover
460
- - title : graph title
461
- - x_axis_label / y_axis_label / z_axis_label : label for X, Y, Z axis
462
- - width / height : size of the graphe
463
- - xaxis_range / y_axis_range / z_axis_range : range values for axis. None for auto values.
464
- - size_value = minimun size (or constant) for dots
465
- - opacity : dots transparency
466
- - plot_bgcolor : background color for plot
467
- - paper_bgcolor : background color for the area around the plot
468
- - color : color code for dots if col_category is None
469
- - line_width : width of dots contours
470
- - line_color : color of dots contours
576
+ Create a 3D scatter plot.
577
+
578
+ Args:
579
+ df (pd.DataFrame): DataFrame containing all data.
580
+ col_x (str): Name of the column containing X values.
581
+ col_y (str): Name of the column containing Y values.
582
+ col_z (str): Name of the column containing Z values.
583
+ col_category (str): Name of the column for colorization.
584
+ color_palette (dict): A dictionary mapping categories with color values.
585
+ col_size (str): Name of the column for dot sizes.
586
+ col_text (str): Name of the column containing text for legend on hover.
587
+ title (str, optional): Graph title. Defaults to "3D Scatter Plot".
588
+ x_axis_label (str, optional): Label for X-axis. Defaults to "X-axis".
589
+ y_axis_label (str, optional): Label for Y-axis. Defaults to "Y-axis".
590
+ z_axis_label (str, optional): Label for Z-axis. Defaults to "Z-axis".
591
+ width (int, optional): Width of the graph. Defaults to 1000.
592
+ height (int, optional): Height of the graph. Defaults to 1000.
593
+ xaxis_range (list, optional): Range values for the X-axis. Defaults to None.
594
+ yaxis_range (list, optional): Range values for the Y-axis. Defaults to None.
595
+ zaxis_range (list, optional): Range values for the Z-axis. Defaults to None.
596
+ size_value (int, optional): Minimum size (or constant) for dots. Defaults to 4.
597
+ opacity (float, optional): Dots transparency. Defaults to 0.8.
598
+ plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
599
+ paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
600
+ color (str, optional): Color code for dots if col_category is None. Defaults to "indianred".
601
+ line_width (float, optional): Width of dots contours. Defaults to 0.5.
602
+ line_color (str, optional): Color of dots contours. Defaults to "white".
603
+ template (str, optional): Plotly template. Defaults to "plotly".
604
+
605
+ Returns:
606
+ go.Figure: Plotly figure object.
471
607
  """
472
608
  fig=go.Figure()
473
609
  if col_category is not None:
@@ -587,19 +723,31 @@ def scatter3D(df, col_x, col_y, col_z, col_category, color_palette, col_size, co
587
723
  return fig
588
724
 
589
725
 
590
- def fig_bar_trend(df, col_x, bar_measure, trend_measure, x_name="X", bar_name ="metric1", trend_name = "metric2", marker_color='lightpink', line_color='indianred', title_text="Couverture & Résonance", width=1500, height=700, xaxis_tickangle=0, opacity=0.8, plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
726
+ def fig_bar_trend(df: pd.DataFrame, col_x: str, bar_measure: str, trend_measure: str, x_name: str = "X", bar_name: str = "metric1", trend_name: str = "metric2", marker_color: str = 'lightpink', line_color: str = 'indianred', title_text: str = "Couverture & Résonance", width: int = 1500, height: int = 700, xaxis_tickangle: int = 0, opacity: float = 0.8, plot_bgcolor: str = None, paper_bgcolor: str = None, template: str = "plotly") -> go.Figure:
591
727
  """
592
- Display a graph that combine bar and trend chart to compare 2 metrics :
593
- - x = x axis data
594
- - bar_measure = data represented as bar diagram
595
- - trend_measure = data represented as trend line
596
- - x_name / bar_name / trend_name : axis labels
597
- - marker_color = color code for bars
598
- - line_color = color code for trend line
599
- - title_text = graph title
600
- - width / height = size of plot
601
- - xaxis_tickangle = angle for x ticks
602
- - opacity = opacity of bars
728
+ Display a graph that combines bar and trend chart to compare 2 metrics.
729
+
730
+ Args:
731
+ df (pd.DataFrame): DataFrame containing all data.
732
+ col_x (str): Name of the column containing X values.
733
+ bar_measure (str): Data represented as bar diagram.
734
+ trend_measure (str): Data represented as trend line.
735
+ x_name (str, optional): Label for X-axis. Defaults to "X".
736
+ bar_name (str, optional): Label for the bar measure. Defaults to "metric1".
737
+ trend_name (str, optional): Label for the trend measure. Defaults to "metric2".
738
+ marker_color (str, optional): Color code for bars. Defaults to 'lightpink'.
739
+ line_color (str, optional): Color code for trend line. Defaults to 'indianred'.
740
+ title_text (str, optional): Graph title. Defaults to "Couverture & Résonance".
741
+ width (int, optional): Width of the graph. Defaults to 1500.
742
+ height (int, optional): Height of the graph. Defaults to 700.
743
+ xaxis_tickangle (int, optional): Angle for x ticks. Defaults to 0.
744
+ opacity (float, optional): Opacity of bars. Defaults to 0.8.
745
+ plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
746
+ paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
747
+ template (str, optional): Plotly template. Defaults to "plotly".
748
+
749
+ Returns:
750
+ go.Figure: Plotly figure object.
603
751
  """
604
752
 
605
753
  # nk = np.empty(shape=(len(x), 3, 1), dtype="object")
@@ -760,54 +908,64 @@ def fig_bar_trend(df, col_x, bar_measure, trend_measure, x_name="X", bar_name ="
760
908
  # return fig
761
909
 
762
910
 
763
- def density_map(df_posts,
764
- df_dots,
765
- df_topics,
766
- col_topic,
767
- col_engagement,
768
- col_text,
769
- col_text_dots,
770
- colorscale = "Portland",
771
- marker_color = "#ff7f0e",
772
- arrow_color = "#ff7f0e",
773
- width=1000,
774
- height=1000,
775
- show_text=True,
776
- show_topics=True,
777
- show_halo=False,
778
- show_histogram =True,
779
- label_size_ratio=100,
780
- n_words = 3,
781
- title_text = "Clustering",
782
- max_dots_displayed=0,
783
- max_topics_displayed=20,
784
- opacity=0.3,
785
- plot_bgcolor=None,
786
- paper_bgcolor=None,
787
- template = "plotly"):
911
+ def density_map(df_posts: pd.DataFrame,
912
+ df_dots: pd.DataFrame,
913
+ df_topics: pd.DataFrame,
914
+ col_topic: str,
915
+ col_engagement: str,
916
+ col_text: str,
917
+ col_text_dots: str,
918
+ colorscale: str = "Portland",
919
+ marker_color: str = "#ff7f0e",
920
+ arrow_color: str = "#ff7f0e",
921
+ width: int = 1000,
922
+ height: int = 1000,
923
+ show_text: bool = True,
924
+ show_topics: bool = True,
925
+ show_halo: bool = False,
926
+ show_histogram: bool = True,
927
+ label_size_ratio: int = 100,
928
+ n_words: int = 3,
929
+ title_text: str = "Clustering",
930
+ max_dots_displayed: int = 0,
931
+ max_topics_displayed: int = 20,
932
+ opacity: float = 0.3,
933
+ plot_bgcolor: str = None,
934
+ paper_bgcolor: str = None,
935
+ template: str = "plotly") -> go.Figure:
936
+ """
937
+ Display a 2D histogram with contours and scattered dots.
938
+
939
+ Args:
940
+ df_posts (pd.DataFrame): DataFrame containing all data points to plot (corresponding to contours).
941
+ df_dots (pd.DataFrame): DataFrame containing a sample of points to plot as dots.
942
+ df_topics (pd.DataFrame): DataFrame containing topics representations.
943
+ col_topic (str): Column name corresponding to category.
944
+ col_engagement (str): Column name corresponding to a metric.
945
+ col_text (str): Column name corresponding to a text separated by |.
946
+ col_text_dots (str): Column name corresponding to the text for dots.
947
+ colorscale (str, optional): Possible values are https://plotly.com/python/builtin-colorscales/. Defaults to "Portland".
948
+ marker_color (str, optional): Dots color value. Defaults to "#ff7f0e".
949
+ arrow_color (str, optional): Arrow pointing to topic centroid color value. Defaults to "#ff7f0e".
950
+ width (int, optional): Width of the plot. Defaults to 1000.
951
+ height (int, optional): Height of the plot. Defaults to 1000.
952
+ show_text (bool, optional): Show dots. Defaults to True.
953
+ show_topics (bool, optional): Show topics labels. Defaults to True.
954
+ show_halo (bool, optional): Show circles around topics. Defaults to False.
955
+ show_histogram (bool, optional): Show 2D histogram with contours. Defaults to True.
956
+ label_size_ratio (int, optional): Influence the size of the topics labels. Higher value means smaller topics labels. Defaults to 100.
957
+ n_words (int, optional): Number of words to display. Defaults to 3.
958
+ title_text (str, optional): Graph title. Defaults to "Clustering".
959
+ max_dots_displayed (int, optional): Number of dots to display. Defaults to 0.
960
+ max_topics_displayed (int, optional): Number of topics to display. Defaults to 20.
961
+ opacity (float, optional): Opacity of dots. Defaults to 0.3.
962
+ plot_bgcolor (str, optional): Background color for the plot. Defaults to None.
963
+ paper_bgcolor (str, optional): Background color for the area around the plot. Defaults to None.
964
+ template (str, optional): Plotly template. Defaults to "plotly".
965
+
966
+ Returns:
967
+ go.Figure: Plotly figure object.
788
968
  """
789
- Display a 2Dhistogram with contours :
790
- - df_posts : dataframe containing all data points to plot (corresponding to contours)
791
- - df_dots : dataframe containing a sample of points to plot as dots
792
- - df_topics : dataframe containing topics representations
793
- - col_topic : column name corresponding to category
794
- - col_engagement : column name corresponding to a metric
795
- - col_text : column name corresponding to a text separated by |
796
- - colorscale : possible values are https://plotly.com/python/builtin-colorscales/
797
- - marker_color : dots color value
798
- - arrow_color : arrow pointing to topic centroid color value
799
- - width / height = size of plot
800
- - show_text : show dots
801
- - show_topic : show topics labels
802
- - show_halo : show circles around topics
803
- - show_histogram : show 2Dhistogram with contours
804
- - label_size_ratio : influence the size of the topics labels, higher value means smaller topics labels
805
- - n_words : number of words to display (words should be separated by | in col_text)
806
- - title_text = graph title
807
- - max_dots_displayed : number of dots to display,
808
- - max_topics_displayed : number of topics to display
809
- - opacity : opacity of dots
810
- """
811
969
 
812
970
  # df_topics = df_distrib_sample.copy()
813
971
  df_topics= df_topics.dropna(subset=col_text)
@@ -926,9 +1084,25 @@ def density_map(df_posts,
926
1084
 
927
1085
 
928
1086
 
929
- def topic_heatmap(df, col_x = "topic_x", col_y = "topic_y", col_topic = "soft_topic", color_continuous_scale='GnBu', title ="Similarity between topics"):
1087
+ def topic_heatmap(df: pd.DataFrame,
1088
+ col_x: str = "topic_x",
1089
+ col_y: str = "topic_y",
1090
+ col_topic: str = "soft_topic",
1091
+ color_continuous_scale: str = 'GnBu',
1092
+ title: str = "Similarity between topics") -> go.Figure:
930
1093
  """
931
-
1094
+ Display a heatmap representing the similarity between topics.
1095
+
1096
+ Args:
1097
+ df (pd.DataFrame): DataFrame containing the topic data.
1098
+ col_x (str, optional): Column name for x-axis coordinates. Defaults to "topic_x".
1099
+ col_y (str, optional): Column name for y-axis coordinates. Defaults to "topic_y".
1100
+ col_topic (str, optional): Column name for the topic labels. Defaults to "soft_topic".
1101
+ color_continuous_scale (str, optional): Plotly color scale. Defaults to 'GnBu'.
1102
+ title (str, optional): Title of the heatmap. Defaults to "Similarity between topics".
1103
+
1104
+ Returns:
1105
+ go.Figure: Plotly figure object representing the heatmap.
932
1106
  """
933
1107
 
934
1108
  distance_matrix = cosine_similarity(np.array(df[[col_x,col_y]]))
@@ -963,7 +1137,34 @@ def topic_heatmap(df, col_x = "topic_x", col_y = "topic_y", col_topic = "soft_to
963
1137
  fig.update_layout(legend_title_text='Trend')
964
1138
  return fig
965
1139
 
966
- def generate_wordcloud(df, col_word, col_metric, width=3000, height=1500, dpi=300, background_color='white', font_path = "font/SEGUIEMJ.TTF", colormap="Viridis", show=False):
1140
+ def generate_wordcloud(df: pd.DataFrame,
1141
+ col_word: str,
1142
+ col_metric: str,
1143
+ width: int = 3000,
1144
+ height: int = 1500,
1145
+ dpi: int = 300,
1146
+ background_color: str = 'white',
1147
+ font_path: str = "font/SEGUIEMJ.TTF",
1148
+ colormap: str = "Viridis",
1149
+ show: bool = False) -> WordCloud:
1150
+ """
1151
+ Generate a word cloud from a DataFrame.
1152
+
1153
+ Args:
1154
+ df (pd.DataFrame): DataFrame containing word frequency data.
1155
+ col_word (str): Column name containing words.
1156
+ col_metric (str): Column name containing frequency metrics for each word.
1157
+ width (int, optional): Width of the word cloud image. Defaults to 3000.
1158
+ height (int, optional): Height of the word cloud image. Defaults to 1500.
1159
+ dpi (int, optional): Dots per inch for image resolution. Defaults to 300.
1160
+ background_color (str, optional): Background color of the word cloud image. Defaults to 'white'.
1161
+ font_path (str, optional): Path to the font file to be used in the word cloud. Defaults to "font/SEGUIEMJ.TTF".
1162
+ colormap (str, optional): Colormap for the word cloud image. Defaults to "Viridis".
1163
+ show (bool, optional): Whether to display the word cloud image. Defaults to False.
1164
+
1165
+ Returns:
1166
+ WordCloud: WordCloud object representing the generated word cloud.
1167
+ """
967
1168
 
968
1169
  top_n_words={row[col_word]:row[col_metric] for i,row in df.iterrows()}
969
1170
 
@@ -974,12 +1175,36 @@ def generate_wordcloud(df, col_word, col_metric, width=3000, height=1500, dpi=30
974
1175
  plt.imshow(wordcloud, interpolation='bilinear')
975
1176
  plt.axis('off')
976
1177
  plt.show()
977
-
978
1178
  return wordcloud
979
1179
 
1180
+ def create_radar(df: pd.DataFrame,
1181
+ col_topic: str,
1182
+ col_metrics: list,
1183
+ title: str = "Radar",
1184
+ opacity: float = 0.6,
1185
+ width: int = 1000,
1186
+ height: int = 1000,
1187
+ template: str = "ggplot2",
1188
+ plot_bgcolor: str = None,
1189
+ paper_bgcolor: str = None) -> go.Figure:
1190
+ """
1191
+ Create a radar chart.
980
1192
 
1193
+ Args:
1194
+ df (pd.DataFrame): DataFrame containing data for radar chart.
1195
+ col_topic (str): Column name containing topics.
1196
+ col_metrics (List[str]): List of column names containing metric values.
1197
+ title (str, optional): Title of the radar chart. Defaults to "Radar".
1198
+ opacity (float, optional): Opacity of radar area. Defaults to 0.6.
1199
+ width (int, optional): Width of the radar chart. Defaults to 1000.
1200
+ height (int, optional): Height of the radar chart. Defaults to 1000.
1201
+ template (str, optional): Plotly template to use. Defaults to "ggplot2".
1202
+ plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
1203
+ paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
981
1204
 
982
- def create_radar(df, col_topic, col_metrics, title="Radar", opacity=0.6, width = 1000, height= 1000, template = "ggplot2" , plot_bgcolor=None, paper_bgcolor=None):
1205
+ Returns:
1206
+ go.Figure: Plotly Figure object representing the radar chart.
1207
+ """
983
1208
 
984
1209
  df = df[[col_topic] + col_metrics]
985
1210
  col_metrics.append(col_metrics[0])
@@ -1026,12 +1251,54 @@ def create_radar(df, col_topic, col_metrics, title="Radar", opacity=0.6, width =
1026
1251
  template=template,
1027
1252
  margin=dict(l=100, r=100, t=100, b=100)
1028
1253
  )
1254
+ return fig
1029
1255
 
1256
+ def bar_subplots(df: pd.DataFrame,
1257
+ col_x: str,
1258
+ col_y: str,
1259
+ col_cat: str,
1260
+ color_palette: dict = None,
1261
+ n_cols: int = 4,
1262
+ n_top_words: int = 20,
1263
+ horizontal_spacing: float = 0.2,
1264
+ vertical_spacing: float = 0.08,
1265
+ textposition: str = None,
1266
+ color: str = None,
1267
+ title: str = "Top words per topic",
1268
+ template: str = "plotly",
1269
+ bargap: float = 0.4,
1270
+ width: int = 500,
1271
+ height: int = 35,
1272
+ plot_bgcolor: str = None,
1273
+ paper_bgcolor: str = None,
1274
+ showlegend: bool = True) -> go.Figure:
1275
+ """
1276
+ Create subplots of horizontal bar charts.
1030
1277
 
1031
- return fig
1278
+ Args:
1279
+ df (pd.DataFrame): DataFrame containing data for bar charts.
1280
+ col_x (str): Name of the column containing x-axis values.
1281
+ col_y (str): Name of the column containing y-axis values.
1282
+ col_cat (str): Name of the column containing categories.
1283
+ color_palette (Optional[Dict[str, str]], optional): Dictionary mapping categories to colors. Defaults to None.
1284
+ n_cols (int, optional): Number of columns in the subplot grid. Defaults to 4.
1285
+ n_top_words (int, optional): Number of top words to display in each bar chart. Defaults to 20.
1286
+ horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.2.
1287
+ vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
1288
+ textposition (Optional[str], optional): Position of the text relative to the bars ('inside', 'outside', or None). Defaults to None.
1289
+ color (Optional[str], optional): Color of the bars. Defaults to None.
1290
+ title (str, optional): Title of the subplot. Defaults to "Top words per topic".
1291
+ template (str, optional): Plotly template to use. Defaults to "plotly".
1292
+ bargap (float, optional): Space between bars in the same cluster. Defaults to 0.4.
1293
+ width (int, optional): Width of each subplot. Defaults to 500.
1294
+ height (int, optional): Height of each bar in the subplot. Defaults to 35.
1295
+ plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
1296
+ paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
1297
+ showlegend (bool, optional): Whether to display the legend. Defaults to True.
1032
1298
 
1033
- def bar_subplots(df, col_x, col_y, col_cat, color_palette, n_cols=4, n_top_words = 20, horizontal_spacing = 0.2, vertical_spacing = 0.08, textposition=None, color = None, title = "Top words per topic", template = "plotly", bargap = 0.4, width = 500, height = 35, plot_bgcolor=None, paper_bgcolor=None, showlegend = True):
1034
-
1299
+ Returns:
1300
+ go.Figure: Plotly Figure object representing the subplots of horizontal bar charts.
1301
+ """
1035
1302
  categories = df[col_cat].unique()
1036
1303
 
1037
1304
  # user define a number of columns, we compute the number of rows requires
@@ -1126,8 +1393,44 @@ def bar_subplots(df, col_x, col_y, col_cat, color_palette, n_cols=4, n_top_words
1126
1393
  )
1127
1394
  return fig
1128
1395
 
1129
- def pie_subplots(df, col_x, col_y, col_cat, col_color, n_cols=4, horizontal_spacing = 0.2, vertical_spacing = 0.08, title = "Top words per topic", template = "plotly", width = 500, height = 150, plot_bgcolor=None, paper_bgcolor=None, showlegend = True):
1130
-
1396
+ def pie_subplots(df: pd.DataFrame,
1397
+ col_x: str,
1398
+ col_y: str,
1399
+ col_cat: str,
1400
+ col_color: str,
1401
+ n_cols: int = 4,
1402
+ horizontal_spacing: float = 0.2,
1403
+ vertical_spacing: float = 0.08,
1404
+ title: str = "Top words per topic",
1405
+ template: str = "plotly",
1406
+ width: int = 500,
1407
+ height: int = 150,
1408
+ plot_bgcolor: str = None,
1409
+ paper_bgcolor: str = None,
1410
+ showlegend: bool = True) -> go.Figure:
1411
+ """
1412
+ Create subplots of pie charts.
1413
+
1414
+ Args:
1415
+ df (pd.DataFrame): DataFrame containing data for pie charts.
1416
+ col_x (str): Name of the column containing labels.
1417
+ col_y (str): Name of the column containing values.
1418
+ col_cat (str): Name of the column containing categories.
1419
+ col_color (str): Name of the column containing colors.
1420
+ n_cols (int, optional): Number of columns in the subplot grid. Defaults to 4.
1421
+ horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.2.
1422
+ vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
1423
+ title (str, optional): Title of the subplot. Defaults to "Top words per topic".
1424
+ template (str, optional): Plotly template to use. Defaults to "plotly".
1425
+ width (int, optional): Width of each subplot. Defaults to 500.
1426
+ height (int, optional): Height of each subplot. Defaults to 150.
1427
+ plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
1428
+ paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
1429
+ showlegend (bool, optional): Whether to display the legend. Defaults to True.
1430
+
1431
+ Returns:
1432
+ go.Figure: Plotly Figure object representing the subplots of pie charts.
1433
+ """
1131
1434
  categories = df[col_cat].unique()
1132
1435
 
1133
1436
  # user define a number of columns, we compute the number of rows requires
@@ -1193,8 +1496,44 @@ def pie_subplots(df, col_x, col_y, col_cat, col_color, n_cols=4, horizontal_spac
1193
1496
  return fig
1194
1497
 
1195
1498
 
1196
- def horizontal_stacked_bars(df, col_x, col_y, col_percentage, col_cat, col_color, title_text = "Sentiment per topic", width=1200, height=1200, xaxis_tickangle=0, horizontal_spacing = 0, vertical_spacing = 0.08, plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
1499
+ def horizontal_stacked_bars(df: pd.DataFrame,
1500
+ col_x: str,
1501
+ col_y: str,
1502
+ col_percentage: str,
1503
+ col_cat: str,
1504
+ col_color: str,
1505
+ title_text: str = "Sentiment per topic",
1506
+ width: int = 1200,
1507
+ height: int = 1200,
1508
+ xaxis_tickangle: int = 0,
1509
+ horizontal_spacing: float = 0,
1510
+ vertical_spacing: float = 0.08,
1511
+ plot_bgcolor: str = None,
1512
+ paper_bgcolor: str = None,
1513
+ template: str = "plotly") -> go.Figure:
1514
+ """
1515
+ Create horizontal stacked bar plots.
1516
+
1517
+ Args:
1518
+ df (pd.DataFrame): DataFrame containing data for the bar plots.
1519
+ col_x (str): Name of the column containing x-axis values.
1520
+ col_y (str): Name of the column containing y-axis values.
1521
+ col_percentage (str): Name of the column containing percentage values.
1522
+ col_cat (str): Name of the column containing categories.
1523
+ col_color (str): Name of the column containing colors.
1524
+ title_text (str, optional): Title of the plot. Defaults to "Sentiment per topic".
1525
+ width (int, optional): Width of the plot. Defaults to 1200.
1526
+ height (int, optional): Height of the plot. Defaults to 1200.
1527
+ xaxis_tickangle (int, optional): Angle for x-axis ticks. Defaults to 0.
1528
+ horizontal_spacing (float, optional): Spacing between subplots horizontally. Defaults to 0.
1529
+ vertical_spacing (float, optional): Spacing between subplots vertically. Defaults to 0.08.
1530
+ plot_bgcolor (Optional[str], optional): Background color of the plot. Defaults to None.
1531
+ paper_bgcolor (Optional[str], optional): Background color of the paper. Defaults to None.
1532
+ template (str, optional): Plotly template to use. Defaults to "plotly".
1197
1533
 
1534
+ Returns:
1535
+ go.Figure: Plotly Figure object representing the horizontal stacked bar plots.
1536
+ """
1198
1537
  categories = df[col_cat].unique()
1199
1538
 
1200
1539
  n_cols=2
@@ -1271,8 +1610,44 @@ def horizontal_stacked_bars(df, col_x, col_y, col_percentage, col_cat, col_color
1271
1610
 
1272
1611
  return fig
1273
1612
 
1274
- def bar_trend_per_day(df, col_date, col_metric1, col_metric2, xaxis_title = "Date", y1_axis_title = "Verbatims", y2_axis_title = "Engagements", title_text = "Trend - couverture & résonance", width = 1500, height = 700, marker_color = "indianred", line_color = "#273746", plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
1613
+ def bar_trend_per_day(df: pd.DataFrame,
1614
+ col_date: str,
1615
+ col_metric1: str,
1616
+ col_metric2: str,
1617
+ xaxis_title: str = "Date",
1618
+ y1_axis_title: str = "Verbatims",
1619
+ y2_axis_title: str = "Engagements",
1620
+ title_text: str = "Trend - couverture & résonance",
1621
+ width: int = 1500,
1622
+ height: int = 700,
1623
+ marker_color: str = "indianred",
1624
+ line_color: str = "#273746",
1625
+ plot_bgcolor: str = None,
1626
+ paper_bgcolor: str = None,
1627
+ template: str = "plotly") -> go.Figure:
1628
+ """
1629
+ Creates a Plotly stacked bar chart with a secondary line plot for two metrics over time.
1630
+
1631
+ Parameters:
1632
+ - df (pd.DataFrame): The DataFrame containing the data.
1633
+ - col_date (str): The name of the column containing dates.
1634
+ - col_metric1 (str): The name of the column containing the first metric values.
1635
+ - col_metric2 (str): The name of the column containing the second metric values.
1636
+ - xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
1637
+ - y1_axis_title (str, optional): The title for the primary y-axis. Defaults to "Verbatims".
1638
+ - y2_axis_title (str, optional): The title for the secondary y-axis. Defaults to "Engagements".
1639
+ - title_text (str, optional): The title text for the chart. Defaults to "Trend - couverture & résonance".
1640
+ - width (int, optional): The width of the chart. Defaults to 1500.
1641
+ - height (int, optional): The height of the chart. Defaults to 700.
1642
+ - marker_color (str, optional): The color of the bars. Defaults to "indianred".
1643
+ - line_color (str, optional): The color of the line plot. Defaults to "#273746".
1644
+ - plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
1645
+ - paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
1646
+ - template (str, optional): The template of the chart. Defaults to "plotly".
1275
1647
 
1648
+ Returns:
1649
+ - fig (go.Figure): The Plotly Figure object representing the stacked bar chart with line plot.
1650
+ """
1276
1651
  # Plotly Stacked Bar Chart
1277
1652
  fig = make_subplots(specs=[[{"secondary_y": True}]])
1278
1653
  hovertemplate='<b>Date :</b>'+ df[col_date].astype(str) + '<br><b>'+y1_axis_title+'</b>:'+ df[col_metric1].astype(str)+ '<br><b>'+y2_axis_title+'</b>:'+ df[col_metric2].astype(int).astype(str)
@@ -1341,8 +1716,46 @@ def bar_trend_per_day(df, col_date, col_metric1, col_metric2, xaxis_title = "Da
1341
1716
 
1342
1717
  return fig
1343
1718
 
1344
- def bar_trend_per_day_per_cat(df, col_date, col_cat, col_metric1, col_metric2, col_color, xaxis_title = "Date", y1_axis_title = "Verbatims", y2_axis_title = "Engagements", title_text = "Trend - couverture & résonance", vertical_spacing = 0.1, width = 1500, height = 700, marker_color = "indianred", line_color = "#273746", plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
1719
+ def bar_trend_per_day_per_cat(df: pd.DataFrame,
1720
+ col_date: str,
1721
+ col_cat: str,
1722
+ col_metric1: str,
1723
+ col_metric2: str,
1724
+ col_color: str,
1725
+ xaxis_title: str = "Date",
1726
+ y1_axis_title: str = "Verbatims",
1727
+ y2_axis_title: str = "Engagements",
1728
+ title_text: str = "Trend - couverture & résonance",
1729
+ vertical_spacing: float = 0.1,
1730
+ width: int = 1500,
1731
+ height: int = 700,
1732
+ plot_bgcolor: str = None,
1733
+ paper_bgcolor: str = None,
1734
+ template: str = "plotly") -> go.Figure:
1735
+ """
1736
+ Creates a Plotly stacked bar chart with multiple categories, each represented as a separate subplot.
1737
+
1738
+ Parameters:
1739
+ - df (pd.DataFrame): The DataFrame containing the data.
1740
+ - col_date (str): The name of the column containing dates.
1741
+ - col_cat (str): The name of the column containing categories.
1742
+ - col_metric1 (str): The name of the column containing the first metric values.
1743
+ - col_metric2 (str): The name of the column containing the second metric values.
1744
+ - col_color (str): The name of the column containing the color codes for each category.
1745
+ - xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
1746
+ - y1_axis_title (str, optional): The title for the primary y-axis. Defaults to "Verbatims".
1747
+ - y2_axis_title (str, optional): The title for the secondary y-axis. Defaults to "Engagements".
1748
+ - title_text (str, optional): The title text for the chart. Defaults to "Trend - couverture & résonance".
1749
+ - vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
1750
+ - width (int, optional): The width of the chart. Defaults to 1500.
1751
+ - height (int, optional): The height of the chart. Defaults to 700.
1752
+ - plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
1753
+ - paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
1754
+ - template (str, optional): The template of the chart. Defaults to "plotly".
1345
1755
 
1756
+ Returns:
1757
+ - fig (go.Figure): The Plotly Figure object representing the stacked bar chart with subplots for each category.
1758
+ """
1346
1759
  fig = make_subplots(
1347
1760
  rows = 2, # number of rows
1348
1761
  cols = 1, # number of columns
@@ -1423,8 +1836,36 @@ def bar_trend_per_day_per_cat(df, col_date, col_cat, col_metric1, col_metric2, c
1423
1836
 
1424
1837
  return fig
1425
1838
 
1426
- def pie(df, col_x, col_y, col_color, title = "Sentiment", template = "plotly", width = 1000, height = 1000, plot_bgcolor=None, paper_bgcolor=None, showlegend = True):
1427
-
1839
+ def pie(df: pd.DataFrame,
1840
+ col_x: str,
1841
+ col_y: str,
1842
+ col_color: str,
1843
+ title: str = "Sentiment",
1844
+ template: str = "plotly",
1845
+ width: int = 1000,
1846
+ height: int = 1000,
1847
+ plot_bgcolor: str = None,
1848
+ paper_bgcolor: str = None,
1849
+ showlegend: bool = True) -> go.Figure:
1850
+ """
1851
+ Creates a Plotly pie chart.
1852
+
1853
+ Parameters:
1854
+ - df (pd.DataFrame): The DataFrame containing the data.
1855
+ - col_x (str): The name of the column containing the labels for the pie chart slices.
1856
+ - col_y (str): The name of the column containing the values for the pie chart slices.
1857
+ - col_color (str): The name of the column containing the colors for the pie chart slices.
1858
+ - title (str, optional): The title for the pie chart. Defaults to "Sentiment".
1859
+ - template (str, optional): The template of the chart. Defaults to "plotly".
1860
+ - width (int, optional): The width of the chart. Defaults to 1000.
1861
+ - height (int, optional): The height of the chart. Defaults to 1000.
1862
+ - plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
1863
+ - paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
1864
+ - showlegend (bool, optional): Whether to show the legend. Defaults to True.
1865
+
1866
+ Returns:
1867
+ - fig (go.Figure): The Plotly Figure object representing the pie chart.
1868
+ """
1428
1869
  fig = go.Figure()
1429
1870
  fig.add_trace(go.Pie(
1430
1871
  labels=df[col_x],
@@ -1463,8 +1904,40 @@ def pie(df, col_x, col_y, col_color, title = "Sentiment", template = "plotly",
1463
1904
  )
1464
1905
  return fig
1465
1906
 
1466
- def bar(df, x, y, color="indianred", xaxis_title="x", yaxis_title="y", width=1200, height = 700, title_text="", plot_bgcolor=None, paper_bgcolor=None, template = "plotly", showlegend=True):
1907
+ def bar(df: pd.DataFrame,
1908
+ x: str,
1909
+ y: str,
1910
+ color: str = "indianred",
1911
+ xaxis_title: str = "x",
1912
+ yaxis_title: str = "y",
1913
+ width: int = 1200,
1914
+ height: int = 700,
1915
+ title_text: str = "",
1916
+ plot_bgcolor: str = None,
1917
+ paper_bgcolor: str = None,
1918
+ template: str = "plotly",
1919
+ showlegend: bool = True) -> go.Figure:
1920
+ """
1921
+ Creates a Plotly vertical bar chart.
1922
+
1923
+ Parameters:
1924
+ - df (pd.DataFrame): The DataFrame containing the data.
1925
+ - x (str): The name of the column containing the x-axis values.
1926
+ - y (str): The name of the column containing the y-axis values.
1927
+ - color (str, optional): The color of the bars. Defaults to "indianred".
1928
+ - xaxis_title (str, optional): The title for the x-axis. Defaults to "x".
1929
+ - yaxis_title (str, optional): The title for the y-axis. Defaults to "y".
1930
+ - width (int, optional): The width of the chart. Defaults to 1200.
1931
+ - height (int, optional): The height of the chart. Defaults to 700.
1932
+ - title_text (str, optional): The title text for the chart. Defaults to "".
1933
+ - plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
1934
+ - paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
1935
+ - template (str, optional): The template of the chart. Defaults to "plotly".
1936
+ - showlegend (bool, optional): Whether to show the legend. Defaults to True.
1467
1937
 
1938
+ Returns:
1939
+ - fig (go.Figure): The Plotly Figure object representing the vertical bar chart.
1940
+ """
1468
1941
  fig = go.Figure()
1469
1942
  fig.add_trace(
1470
1943
  go.Bar(
@@ -1501,7 +1974,28 @@ def bar(df, x, y, color="indianred", xaxis_title="x", yaxis_title="y", width=120
1501
1974
  return fig
1502
1975
 
1503
1976
 
1504
- def add_horizontal_line(fig, y, line_color = "gray", line_width = 1.5, line_dash = "dash", annotation_text = "Longueur moyenne des textes", annotation_position = "top right"):
1977
+ def add_horizontal_line(fig: go.Figure,
1978
+ y: float,
1979
+ line_color: str = "gray",
1980
+ line_width: float = 1.5,
1981
+ line_dash: str = "dash",
1982
+ annotation_text: str = "Longueur moyenne des textes",
1983
+ annotation_position: str = "top right") -> go.Figure:
1984
+ """
1985
+ Adds a horizontal line to a Plotly Figure object.
1986
+
1987
+ Parameters:
1988
+ - fig (go.Figure): The Plotly Figure object to which the horizontal line will be added.
1989
+ - y (float): The y-coordinate of the horizontal line.
1990
+ - line_color (str, optional): The color of the horizontal line. Defaults to "gray".
1991
+ - line_width (float, optional): The width of the horizontal line. Defaults to 1.5.
1992
+ - line_dash (str, optional): The dash style of the horizontal line. Defaults to "dash".
1993
+ - annotation_text (str, optional): The text annotation associated with the horizontal line. Defaults to "Longueur moyenne des textes".
1994
+ - annotation_position (str, optional): The position of the annotation relative to the horizontal line. Defaults to "top right".
1995
+
1996
+ Returns:
1997
+ - fig (go.Figure): The Plotly Figure object with the horizontal line added.
1998
+ """
1505
1999
  fig.add_hline(
1506
2000
  y=y,
1507
2001
  line_width=line_width,
@@ -1512,7 +2006,28 @@ def add_horizontal_line(fig, y, line_color = "gray", line_width = 1.5, line_dash
1512
2006
  )
1513
2007
  return fig
1514
2008
 
1515
- def add_vertical_line(fig, x, line_color = "gray", line_width = 1.5, line_dash = "dash", annotation_text = "Longueur moyenne des textes", annotation_position = "top right"):
2009
+ def add_vertical_line(fig: go.Figure,
2010
+ x: float,
2011
+ line_color: str = "gray",
2012
+ line_width: float = 1.5,
2013
+ line_dash: str = "dash",
2014
+ annotation_text: str = "Longueur moyenne des textes",
2015
+ annotation_position: str = "top right") -> go.Figure:
2016
+ """
2017
+ Adds a vertical line to a Plotly Figure object.
2018
+
2019
+ Parameters:
2020
+ - fig (go.Figure): The Plotly Figure object to which the vertical line will be added.
2021
+ - x (float): The x-coordinate of the vertical line.
2022
+ - line_color (str, optional): The color of the vertical line. Defaults to "gray".
2023
+ - line_width (float, optional): The width of the vertical line. Defaults to 1.5.
2024
+ - line_dash (str, optional): The dash style of the vertical line. Defaults to "dash".
2025
+ - annotation_text (str, optional): The text annotation associated with the vertical line. Defaults to "Longueur moyenne des textes".
2026
+ - annotation_position (str, optional): The position of the annotation relative to the vertical line. Defaults to "top right".
2027
+
2028
+ Returns:
2029
+ - fig (go.Figure): The Plotly Figure object with the vertical line added.
2030
+ """
1516
2031
  fig.add_vline(
1517
2032
  x=x,
1518
2033
  line_width=line_width,
@@ -1523,9 +2038,50 @@ def add_vertical_line(fig, x, line_color = "gray", line_width = 1.5, line_dash =
1523
2038
  )
1524
2039
  return fig
1525
2040
 
1526
- def network_graph(T, col_size="scaled_size", col_color="modularity_color", title_text = "Analyse de similitudes", sample_nodes = 0.15, show_edges=True, show_halo=False, textposition=None, line_color = "#B7B7B7", line_dash="dot", edge_mode = "lines+markers", node_mode="markers+text", opacity=0.2, width=1600, height=1200, plot_bgcolor=None, paper_bgcolor=None, template="plotly"):
1527
-
1528
-
2041
+ def network_graph(T: nx.Graph,
2042
+ col_size: str = "scaled_size",
2043
+ col_color: str = "modularity_color",
2044
+ title_text: str = "Analyse de similitudes",
2045
+ sample_nodes: float = 0.15,
2046
+ show_edges: bool = True,
2047
+ show_halo: bool = False,
2048
+ textposition: str = None,
2049
+ line_color: str = "#B7B7B7",
2050
+ line_dash: str = "dot",
2051
+ edge_mode: str = "lines+markers",
2052
+ node_mode: str = "markers+text",
2053
+ opacity: float = 0.2,
2054
+ width: int = 1600,
2055
+ height: int = 1200,
2056
+ plot_bgcolor: str = None,
2057
+ paper_bgcolor: str = None,
2058
+ template: str = "plotly") -> go.Figure:
2059
+ """
2060
+ Creates a network graph visualization using Plotly.
2061
+
2062
+ Parameters:
2063
+ - T (nx.Graph): The NetworkX graph object.
2064
+ - col_size (str, optional): The column name for node size. Defaults to "scaled_size".
2065
+ - col_color (str, optional): The column name for node color. Defaults to "modularity_color".
2066
+ - title_text (str, optional): The title for the graph. Defaults to "Analyse de similitudes".
2067
+ - sample_nodes (float, optional): The proportion of nodes to sample for displaying labels. Defaults to 0.15.
2068
+ - show_edges (bool, optional): Whether to display edges. Defaults to True.
2069
+ - show_halo (bool, optional): Whether to display halo around nodes. Defaults to False.
2070
+ - textposition (str, optional): The position of node labels. Defaults to None.
2071
+ - line_color (str, optional): The color of edges. Defaults to "#B7B7B7".
2072
+ - line_dash (str, optional): The dash style of edges. Defaults to "dot".
2073
+ - edge_mode (str, optional): The mode for displaying edges. Defaults to "lines+markers".
2074
+ - node_mode (str, optional): The mode for displaying nodes. Defaults to "markers+text".
2075
+ - opacity (float, optional): The opacity of nodes. Defaults to 0.2.
2076
+ - width (int, optional): The width of the plot. Defaults to 1600.
2077
+ - height (int, optional): The height of the plot. Defaults to 1200.
2078
+ - plot_bgcolor (str, optional): The background color of the plot area. Defaults to None.
2079
+ - paper_bgcolor (str, optional): The background color of the paper area. Defaults to None.
2080
+ - template (str, optional): The template of the plot. Defaults to "plotly".
2081
+
2082
+ Returns:
2083
+ - fig (go.Figure): The Plotly Figure object representing the network graph visualization.
2084
+ """
1529
2085
  # on construit un dataframe des noeuds à partir des données du graphe pour plus de simplicité
1530
2086
  df_nodes=pd.DataFrame()
1531
2087
  for node in T.nodes(data=True):
@@ -1638,7 +2194,24 @@ def network_graph(T, col_size="scaled_size", col_color="modularity_color", titl
1638
2194
 
1639
2195
  return fig
1640
2196
 
1641
- def richesse_lexicale(df, title= "Richesse lexicale", width=1200, height=1000, template="plotly"):
2197
+ def richesse_lexicale(df: pd.DataFrame,
2198
+ title: str = "Richesse lexicale",
2199
+ width: int = 1200,
2200
+ height: int = 1000,
2201
+ template: str = "plotly") -> go.Figure:
2202
+ """
2203
+ Creates a lexical richness visualization using Plotly.
2204
+
2205
+ Parameters:
2206
+ - df (pd.DataFrame): The DataFrame containing word frequency data.
2207
+ - title (str, optional): The title for the plot. Defaults to "Richesse lexicale".
2208
+ - width (int, optional): The width of the plot. Defaults to 1200.
2209
+ - height (int, optional): The height of the plot. Defaults to 1000.
2210
+ - template (str, optional): The template of the plot. Defaults to "plotly".
2211
+
2212
+ Returns:
2213
+ - fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization.
2214
+ """
1642
2215
  df = create_frequency_table(df, "freq")
1643
2216
  fig_richesse = go.Figure()
1644
2217
  fig_richesse.add_trace(
@@ -1659,7 +2232,26 @@ def richesse_lexicale(df, title= "Richesse lexicale", width=1200, height=1000, t
1659
2232
  fig_richesse.update_yaxes(tickformat=".0f", title_text="Freq", type="log")
1660
2233
  return fig_richesse
1661
2234
 
1662
- def richesse_lexicale_per_topic(df, col_topic, title= "Richesse lexicale par topic", width=1200, height=1000, template="plotly"):
2235
+ def richesse_lexicale_per_topic(df: pd.DataFrame,
2236
+ col_topic: str,
2237
+ title: str = "Richesse lexicale par topic",
2238
+ width: int = 1200,
2239
+ height: int = 1000,
2240
+ template: str = "plotly") -> go.Figure:
2241
+ """
2242
+ Creates a lexical richness visualization per topic using Plotly.
2243
+
2244
+ Parameters:
2245
+ - df (pd.DataFrame): The DataFrame containing word frequency data.
2246
+ - col_topic (str): The name of the column representing topics.
2247
+ - title (str, optional): The title for the plot. Defaults to "Richesse lexicale par topic".
2248
+ - width (int, optional): The width of the plot. Defaults to 1200.
2249
+ - height (int, optional): The height of the plot. Defaults to 1000.
2250
+ - template (str, optional): The template of the plot. Defaults to "plotly".
2251
+
2252
+ Returns:
2253
+ - fig_richesse (go.Figure): The Plotly Figure object representing the lexical richness visualization per topic.
2254
+ """
1663
2255
  fig_richesse = go.Figure()
1664
2256
  for topic in list(df[col_topic].unique()):
1665
2257
  df_tmp = create_frequency_table(df[df[col_topic]==topic], "freq")
@@ -1681,8 +2273,42 @@ def richesse_lexicale_per_topic(df, col_topic, title= "Richesse lexicale par top
1681
2273
  fig_richesse.update_yaxes(tickformat=".0f", title_text="Freq", type="log")
1682
2274
  return fig_richesse
1683
2275
 
1684
- def subplots_bar_per_day_per_cat(df, col_date, col_cat, metrics, col_color, y_axis_titles, xaxis_title = "Date",title_text = "Trend - couverture & résonance", vertical_spacing = 0.1, width = 1500, height = 700, marker_color = "indianred", line_color = "#273746", plot_bgcolor=None, paper_bgcolor=None, template = "plotly"):
2276
+ def subplots_bar_per_day_per_cat(df: pd.DataFrame,
2277
+ col_date: str,
2278
+ col_cat: str,
2279
+ metrics: list,
2280
+ col_color: str,
2281
+ y_axis_titles: list,
2282
+ xaxis_title: str = "Date",
2283
+ title_text: str = "Trend - couverture & résonance",
2284
+ vertical_spacing: float = 0.1,
2285
+ width: int = 1500,
2286
+ height: int = 700,
2287
+ plot_bgcolor: str = None,
2288
+ paper_bgcolor: str = None,
2289
+ template: str = "plotly") -> go.Figure:
2290
+ """
2291
+ Creates subplots of stacked bar charts per day and category using Plotly.
2292
+
2293
+ Parameters:
2294
+ - df (pd.DataFrame): The DataFrame containing the data.
2295
+ - col_date (str): The name of the column representing dates.
2296
+ - col_cat (str): The name of the column representing categories.
2297
+ - metrics (List[str]): A list of column names representing metrics to be plotted.
2298
+ - col_color (str): The name of the column representing colors for bars.
2299
+ - y_axis_titles (List[str]): A list of titles for the y-axes of subplots.
2300
+ - xaxis_title (str, optional): The title for the x-axis. Defaults to "Date".
2301
+ - title_text (str, optional): The title for the entire plot. Defaults to "Trend - couverture & résonance".
2302
+ - vertical_spacing (float, optional): The space between subplots. Defaults to 0.1.
2303
+ - width (int, optional): The width of the entire plot. Defaults to 1500.
2304
+ - height (int, optional): The height of each subplot. Defaults to 700.
2305
+ - plot_bgcolor (str, optional): The background color for the plot area. Defaults to None.
2306
+ - paper_bgcolor (str, optional): The background color for the paper area. Defaults to None.
2307
+ - template (str, optional): The template of the plot. Defaults to "plotly".
1685
2308
 
2309
+ Returns:
2310
+ - fig (go.Figure): The Plotly Figure object representing the subplots of stacked bar charts.
2311
+ """
1686
2312
  fig = make_subplots(
1687
2313
  rows = len(metrics), # number of rows
1688
2314
  cols = 1, # number of columns
@@ -1758,7 +2384,38 @@ def subplots_bar_per_day_per_cat(df, col_date, col_cat, metrics, col_color, y_ax
1758
2384
  return fig
1759
2385
 
1760
2386
 
1761
- def add_shape(fig, shape_type = "rect", x0= -1, y0= -1, x1 = 0, y1=0, fillcolor= 'Silver', opacity = 0.1, line_width = 0, line_color = 'white', dash = None, layer = "below"):
2387
+ def add_shape(fig: go.Figure,
2388
+ shape_type: str = "rect",
2389
+ x0: float = -1,
2390
+ y0: float = -1,
2391
+ x1: float = 0,
2392
+ y1: float = 0,
2393
+ fillcolor: str = 'Silver',
2394
+ opacity: float = 0.1,
2395
+ line_width: float = 0,
2396
+ line_color: str = 'white',
2397
+ dash: str = None,
2398
+ layer: str = "below") -> go.Figure:
2399
+ """
2400
+ Adds a shape to a Plotly figure.
2401
+
2402
+ Parameters:
2403
+ - fig (go.Figure): The Plotly Figure object.
2404
+ - shape_type (str, optional): The type of shape to add. Defaults to "rect".
2405
+ - x0 (float, optional): The x-coordinate of the lower left corner of the shape. Defaults to -1.
2406
+ - y0 (float, optional): The y-coordinate of the lower left corner of the shape. Defaults to -1.
2407
+ - x1 (float, optional): The x-coordinate of the upper right corner of the shape. Defaults to 0.
2408
+ - y1 (float, optional): The y-coordinate of the upper right corner of the shape. Defaults to 0.
2409
+ - fillcolor (str, optional): The fill color of the shape. Defaults to 'Silver'.
2410
+ - opacity (float, optional): The opacity of the shape. Defaults to 0.1.
2411
+ - line_width (float, optional): The width of the shape's outline. Defaults to 0.
2412
+ - line_color (str, optional): The color of the shape's outline. Defaults to 'white'.
2413
+ - dash (str, optional): The dash style of the shape's outline. Defaults to None.
2414
+ - layer (str, optional): The layer on which the shape is added, either 'below' or 'above' the data. Defaults to "below".
2415
+
2416
+ Returns:
2417
+ - fig (go.Figure): The modified Plotly Figure object with the added shape.
2418
+ """
1762
2419
  fig.add_shape(
1763
2420
  # Shape for the area between (-1, 0)
1764
2421
  {
@@ -1780,7 +2437,34 @@ def add_shape(fig, shape_type = "rect", x0= -1, y0= -1, x1 = 0, y1=0, fillcolor=
1780
2437
  )
1781
2438
  return fig
1782
2439
 
1783
- def add_image(fig, xref = "paper", yref = "paper", x = 0, y=0, sizex = 0.08, sizey=0.08, xanchor="right", yanchor="bottom", source = ""):
2440
+ def add_image(fig: go.Figure,
2441
+ xref: str = "paper",
2442
+ yref: str = "paper",
2443
+ x: float = 0,
2444
+ y: float = 0,
2445
+ sizex: float = 0.08,
2446
+ sizey: float = 0.08,
2447
+ xanchor: str = "right",
2448
+ yanchor: str = "bottom",
2449
+ source: str = "") -> go.Figure:
2450
+ """
2451
+ Adds an image to a Plotly figure.
2452
+
2453
+ Parameters:
2454
+ - fig (go.Figure): The Plotly Figure object.
2455
+ - xref (str, optional): The x-coordinate reference point. Defaults to "paper".
2456
+ - yref (str, optional): The y-coordinate reference point. Defaults to "paper".
2457
+ - x (float, optional): The x-coordinate of the image position. Defaults to 0.
2458
+ - y (float, optional): The y-coordinate of the image position. Defaults to 0.
2459
+ - sizex (float, optional): The size of the image in the x-direction. Defaults to 0.08.
2460
+ - sizey (float, optional): The size of the image in the y-direction. Defaults to 0.08.
2461
+ - xanchor (str, optional): The x-coordinate anchor point. Defaults to "right".
2462
+ - yanchor (str, optional): The y-coordinate anchor point. Defaults to "bottom".
2463
+ - source (str, optional): The URL source of the image. Defaults to "https://www.example.com/image.jpg".
2464
+
2465
+ Returns:
2466
+ - fig (go.Figure): The modified Plotly Figure object with the added image.
2467
+ """
1784
2468
  fig.add_layout_image(
1785
2469
  dict(
1786
2470
  source=source,