convoviz 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
convoviz/configuration.py DELETED
@@ -1,125 +0,0 @@
1
- """Module for handling user configuration and updating the models."""
2
-
3
- from __future__ import annotations
4
-
5
- from questionary import (
6
- Choice,
7
- Style,
8
- checkbox,
9
- select,
10
- )
11
- from questionary import (
12
- path as qst_path,
13
- )
14
- from questionary import (
15
- text as qst_text,
16
- )
17
-
18
- from .models import Conversation, Message
19
- from .utils import (
20
- DEFAULT_USER_CONFIGS,
21
- colormaps,
22
- font_names,
23
- font_path,
24
- stem,
25
- validate_header,
26
- validate_zip,
27
- )
28
-
29
- CUSTOM_STYLE = Style(
30
- [
31
- ("qmark", "fg:#34eb9b bold"),
32
- ("question", "bold fg:#e0e0e0"),
33
- ("answer", "fg:#34ebeb bold"),
34
- ("pointer", "fg:#e834eb bold"),
35
- ("highlighted", "fg:#349ceb bold"),
36
- ("selected", "fg:#34ebeb"),
37
- ("separator", "fg:#eb3434"),
38
- ("instruction", "fg:#eb9434"),
39
- ("text", "fg:#b2eb34"),
40
- ("disabled", "fg:#858585 italic"),
41
- ],
42
- )
43
-
44
-
45
- class UserConfigs:
46
- """Class for handling user configuration."""
47
-
48
- def __init__(self) -> None:
49
- """Initialize UserConfigs object."""
50
- self.configs = DEFAULT_USER_CONFIGS.copy()
51
-
52
- # will implement a way to read from a config file later ...
53
-
54
- def prompt(self) -> None:
55
- """Prompt the user for input and update the configs."""
56
- lookup = self.configs
57
-
58
- lookup["zip_filepath"] = qst_path(
59
- "Enter the path to the zip file :",
60
- lookup["zip_filepath"],
61
- validate=validate_zip,
62
- style=CUSTOM_STYLE,
63
- ).ask()
64
-
65
- lookup["output_folder"] = qst_path(
66
- "Enter the path to the output folder :",
67
- lookup["output_folder"],
68
- style=CUSTOM_STYLE,
69
- ).ask()
70
-
71
- for author_role in lookup["message"]["author_headers"]:
72
- lookup["message"]["author_headers"][author_role] = qst_text(
73
- f"Enter the message header (#) for messages from '{author_role}' :",
74
- lookup["message"]["author_headers"][author_role],
75
- validate=validate_header,
76
- style=CUSTOM_STYLE,
77
- ).ask()
78
-
79
- lookup["conversation"]["markdown"]["latex_delimiters"] = select(
80
- "Select the LaTeX math delimiters you want to use :",
81
- ["default", "dollars"],
82
- lookup["conversation"]["markdown"]["latex_delimiters"],
83
- style=CUSTOM_STYLE,
84
- ).ask()
85
-
86
- yaml_choices = [
87
- Choice(title=header, checked=value)
88
- for header, value in lookup["conversation"]["yaml"].items()
89
- ]
90
-
91
- selected_headers = checkbox(
92
- "Select the YAML metadata headers you want to include :",
93
- yaml_choices,
94
- style=CUSTOM_STYLE,
95
- ).ask()
96
-
97
- for header in lookup["conversation"]["yaml"]:
98
- lookup["conversation"]["yaml"][header] = header in selected_headers
99
-
100
- font_name: str = select(
101
- "Select the font you want to use for the word clouds :",
102
- font_names(),
103
- stem(lookup["wordcloud"].get("font_path") or ""),
104
- style=CUSTOM_STYLE,
105
- ).ask()
106
-
107
- lookup["wordcloud"]["font_path"] = str(font_path(font_name))
108
-
109
- lookup["wordcloud"]["colormap"] = select(
110
- "Select the color theme you want to use for the word clouds :",
111
- colormaps(),
112
- lookup["wordcloud"].get("colormap"),
113
- style=CUSTOM_STYLE,
114
- ).ask()
115
-
116
- lookup["wordcloud"]["custom_stopwords"] = qst_text(
117
- "Enter custom stopwords (separated by commas) :",
118
- lookup["wordcloud"].get("custom_stopwords", ""),
119
- style=CUSTOM_STYLE,
120
- ).ask()
121
-
122
- def set_model_configs(self) -> None:
123
- """Set the configuration for all models."""
124
- Message.update_configs(self.configs["message"])
125
- Conversation.update_configs(self.configs["conversation"])
convoviz/data_analysis.py DELETED
@@ -1,119 +0,0 @@
1
- """Module for all the data visualizations.
2
-
3
- Should ideally only return matplotlib objects, and not deal with the filesystem.
4
- """
5
-
6
- # pyright: reportUnknownMemberType = false
7
-
8
- from __future__ import annotations
9
-
10
- from collections import defaultdict
11
- from datetime import datetime, timezone
12
- from typing import TYPE_CHECKING
13
-
14
- from matplotlib.figure import Figure
15
- from nltk import download as nltk_download # type: ignore[import-untyped]
16
- from nltk.corpus import stopwords as nltk_stopwords # type: ignore[import-untyped]
17
- from nltk.data import find as nltk_find # type: ignore[import-untyped]
18
- from wordcloud import WordCloud # type: ignore[import-untyped]
19
-
20
- from .utils import DEFAULT_WORDCLOUD_CONFIGS
21
-
22
- if TYPE_CHECKING:
23
- from PIL.Image import Image
24
- from typing_extensions import Unpack
25
-
26
- from .utils import GraphKwargs, WordCloudKwargs
27
-
28
-
29
- def generate_week_barplot(
30
- timestamps: list[float],
31
- title: str,
32
- **kwargs: Unpack[GraphKwargs],
33
- ) -> Figure:
34
- """Create a bar graph from the given timestamps, collapsed on one week."""
35
- dates = [datetime.fromtimestamp(ts, timezone.utc) for ts in timestamps]
36
-
37
- weekday_counts: defaultdict[str, int] = defaultdict(int)
38
- days = [
39
- "Monday",
40
- "Tuesday",
41
- "Wednesday",
42
- "Thursday",
43
- "Friday",
44
- "Saturday",
45
- "Sunday",
46
- ]
47
-
48
- for date in dates:
49
- weekday_counts[days[date.weekday()]] += 1
50
-
51
- x = days
52
- y = [weekday_counts[day] for day in days]
53
-
54
- fig = Figure(dpi=300)
55
- ax = fig.add_subplot()
56
-
57
- ax.bar(x, y)
58
- ax.set_xlabel("Weekday")
59
- ax.set_ylabel("Prompt Count")
60
-
61
- ax.set_title(title)
62
-
63
- ax.set_xticks(x)
64
- ax.set_xticklabels(x, rotation=45)
65
- fig.tight_layout()
66
-
67
- return fig
68
-
69
-
70
- # Ensure that the stopwords are downloaded
71
- def _load_nltk_stopwords() -> set[str]:
72
- """Load nltk stopwords."""
73
- try:
74
- nltk_find("corpora/stopwords")
75
- except LookupError:
76
- nltk_download("stopwords")
77
-
78
- languages = [
79
- "arabic",
80
- "english",
81
- "french",
82
- "german",
83
- "spanish",
84
- "portuguese",
85
- ] # add more languages here ...
86
-
87
- return {word for lang in languages for word in nltk_stopwords.words(fileids=lang)}
88
-
89
-
90
- def generate_wordcloud(
91
- text: str,
92
- **kwargs: Unpack[WordCloudKwargs],
93
- ) -> Image:
94
- """Create a wordcloud from the given text."""
95
- configs = DEFAULT_WORDCLOUD_CONFIGS.copy()
96
- configs.update(kwargs)
97
-
98
- nltk_stopwords = _load_nltk_stopwords()
99
-
100
- custom_stopwords = configs.get("custom_stopwords")
101
- custom_stopwords_list = custom_stopwords.split(sep=",") if custom_stopwords else []
102
- custom_stopwords_list = [
103
- word.strip().lower() for word in custom_stopwords_list if word.strip()
104
- ]
105
-
106
- stopwords = nltk_stopwords.union(set(custom_stopwords_list))
107
-
108
- wordcloud = WordCloud(
109
- font_path=configs.get("font_path"),
110
- width=configs.get("width"), # pyright: ignore[reportGeneralTypeIssues]
111
- height=configs.get("height"), # pyright: ignore[reportGeneralTypeIssues]
112
- stopwords=stopwords, # pyright: ignore[reportGeneralTypeIssues]
113
- background_color=configs.get("background_color"), # pyright: ignore[reportGeneralTypeIssues]
114
- mode=configs.get("mode"), # pyright: ignore[reportGeneralTypeIssues]
115
- colormap=configs.get("colormap"),
116
- include_numbers=configs.get("include_numbers"), # pyright: ignore[reportGeneralTypeIssues]
117
- ).generate(text)
118
-
119
- return wordcloud.to_image()
convoviz/long_runs.py DELETED
@@ -1,93 +0,0 @@
1
- """Module for various processes that are used in the controllers."""
2
-
3
- from __future__ import annotations
4
-
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING
7
-
8
- from tqdm import tqdm
9
-
10
- if TYPE_CHECKING:
11
- from typing_extensions import Unpack
12
-
13
- from .models import ConversationSet
14
- from .utils import GraphKwargs, WordCloudKwargs
15
-
16
-
17
- def generate_week_barplots(
18
- conv_set: ConversationSet,
19
- dir_path: Path | str,
20
- *,
21
- progress_bar: bool = False,
22
- **kwargs: Unpack[GraphKwargs],
23
- ) -> None:
24
- """Create the weekwise graphs and save them to the folder."""
25
- dir_path = Path(dir_path)
26
-
27
- month_groups = conv_set.group_by_month()
28
- year_groups = conv_set.group_by_year()
29
-
30
- for month in tqdm(
31
- month_groups.keys(),
32
- "Creating monthly weekwise graphs 📈 ",
33
- disable=not progress_bar,
34
- ):
35
- title = month.strftime("%B '%y")
36
- month_groups[month].week_barplot(title, **kwargs).savefig( # pyright: ignore [reportUnknownMemberType]
37
- dir_path / f"{month.strftime('%Y %B')}.png",
38
- )
39
-
40
- for year in tqdm(
41
- year_groups.keys(),
42
- "Creating yearly weekwise graphs 📈 ",
43
- disable=not progress_bar,
44
- ):
45
- title = year.strftime("%Y")
46
- year_groups[year].week_barplot(title, **kwargs).savefig( # pyright: ignore [reportUnknownMemberType]
47
- dir_path / f"{year.strftime('%Y')}.png",
48
- )
49
-
50
-
51
- def generate_wordclouds(
52
- conv_set: ConversationSet,
53
- dir_path: Path | str,
54
- *,
55
- progress_bar: bool = False,
56
- **kwargs: Unpack[WordCloudKwargs],
57
- ) -> None:
58
- """Create the wordclouds and save them to the folder."""
59
- dir_path = Path(dir_path)
60
-
61
- week_groups = conv_set.group_by_week()
62
- month_groups = conv_set.group_by_month()
63
- year_groups = conv_set.group_by_year()
64
-
65
- for week in tqdm(
66
- week_groups.keys(),
67
- "Creating weekly wordclouds 🔡☁️ ",
68
- disable=not progress_bar,
69
- ):
70
- week_groups[week].wordcloud(**kwargs).save(
71
- dir_path / f"{week.strftime('%Y week %W')}.png",
72
- optimize=True,
73
- )
74
-
75
- for month in tqdm(
76
- month_groups.keys(),
77
- "Creating monthly wordclouds 🔡☁️ ",
78
- disable=not progress_bar,
79
- ):
80
- month_groups[month].wordcloud(**kwargs).save(
81
- dir_path / f"{month.strftime('%Y %B')}.png",
82
- optimize=True,
83
- )
84
-
85
- for year in tqdm(
86
- year_groups.keys(),
87
- "Creating yearly wordclouds 🔡☁️ ",
88
- disable=not progress_bar,
89
- ):
90
- year_groups[year].wordcloud(**kwargs).save(
91
- dir_path / f"{year.strftime('%Y')}.png",
92
- optimize=True,
93
- )
@@ -1,289 +0,0 @@
1
- """Conversation model. Represents a single ChatGPT chat.
2
-
3
- object path : conversations.json -> conversation (one of the list items)
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- from datetime import datetime, timedelta
9
- from os import utime as os_utime
10
- from pathlib import Path
11
- from typing import TYPE_CHECKING, Any, ClassVar
12
-
13
- from orjson import loads
14
- from pydantic import BaseModel
15
-
16
- from convoviz.data_analysis import generate_wordcloud
17
- from convoviz.utils import (
18
- DEFAULT_CONVERSATION_CONFIGS,
19
- ConversationConfigs,
20
- WordCloudKwargs,
21
- close_code_blocks,
22
- replace_latex_delimiters,
23
- sanitize,
24
- )
25
-
26
- from ._node import Node
27
-
28
- if TYPE_CHECKING:
29
- from PIL.Image import Image
30
- from typing_extensions import Unpack
31
-
32
- from ._message import AuthorRole
33
-
34
-
35
- class Conversation(BaseModel):
36
- """Wrapper class for a `conversation` in _a_ `json` file."""
37
-
38
- __configs: ClassVar[ConversationConfigs] = DEFAULT_CONVERSATION_CONFIGS
39
-
40
- title: str
41
- create_time: datetime
42
- update_time: datetime
43
- mapping: dict[str, Node]
44
- moderation_results: list[Any]
45
- current_node: str
46
- plugin_ids: list[str] | None = None
47
- conversation_id: str
48
- conversation_template_id: str | None = None
49
- id: str | None = None # noqa: A003
50
-
51
- @classmethod
52
- def update_configs(cls, configs: ConversationConfigs) -> None:
53
- """Set the configuration for all conversations."""
54
- cls.__configs.update(configs)
55
-
56
- @classmethod
57
- def from_json(cls, filepath: Path | str) -> Conversation:
58
- """Load the conversation from a JSON file."""
59
- filepath = Path(filepath)
60
-
61
- with filepath.open(encoding="utf-8") as file:
62
- return cls(**loads(file.read()))
63
-
64
- @property
65
- def node_mapping(self) -> dict[str, Node]:
66
- """Return a dictionary of connected Node objects, based on the mapping."""
67
- return Node.mapping(self.mapping)
68
-
69
- @property
70
- def _all_message_nodes(self) -> list[Node]:
71
- """List of all nodes that have a message, including all branches."""
72
- return [node for node in self.node_mapping.values() if node.message]
73
-
74
- def _author_nodes(
75
- self,
76
- *authors: AuthorRole,
77
- ) -> list[Node]:
78
- """List of all nodes with the given author role (all branches)."""
79
- if len(authors) == 0:
80
- authors = ("user",)
81
- return [
82
- node
83
- for node in self._all_message_nodes
84
- if node.message and node.message.author.role in authors
85
- ]
86
-
87
- @property
88
- def leaf_count(self) -> int:
89
- """Return the number of leaves in the conversation."""
90
- return sum(1 for node in self._all_message_nodes if not node.children_nodes)
91
-
92
- @property
93
- def url(self) -> str:
94
- """Chat URL."""
95
- return f"https://chat.openai.com/c/{self.conversation_id}"
96
-
97
- @property
98
- def content_types(self) -> list[str]:
99
- """List of all content types in the conversation (all branches)."""
100
- return list(
101
- {
102
- node.message.content.content_type
103
- for node in self._all_message_nodes
104
- if node.message
105
- },
106
- )
107
-
108
- def message_count(
109
- self,
110
- *authors: AuthorRole,
111
- ) -> int:
112
- """Return the number of 'user' and 'assistant' messages (all branches)."""
113
- if len(authors) == 0:
114
- authors = ("user",)
115
- return len(self._author_nodes(*authors))
116
-
117
- @property
118
- def model(self) -> str | None:
119
- """ChatGPT model used for the conversation."""
120
- assistant_nodes: list[Node] = self._author_nodes("assistant")
121
- if not assistant_nodes:
122
- return None
123
-
124
- message = assistant_nodes[0].message
125
-
126
- return message.metadata.model_slug if message else None
127
-
128
- @property
129
- def plugins(self) -> list[str]:
130
- """List of all ChatGPT plugins used in the conversation."""
131
- return list(
132
- {
133
- node.message.metadata.invoked_plugin["namespace"]
134
- for node in self._author_nodes("tool")
135
- if node.message and node.message.metadata.invoked_plugin
136
- },
137
- )
138
-
139
- @property
140
- def custom_instructions(self) -> dict[str, str]:
141
- """Return custom instructions used for the conversation."""
142
- system_nodes = self._author_nodes("system")
143
- if len(system_nodes) < 2:
144
- return {}
145
-
146
- context_message = system_nodes[1].message
147
- if context_message and context_message.metadata.is_user_system_message:
148
- return context_message.metadata.user_context_message_data or {}
149
-
150
- return {}
151
-
152
- # TODO: check if this is the same for conversations from the bookmarklet
153
-
154
- @property
155
- def yaml(self) -> str:
156
- """YAML metadata header for the conversation."""
157
- yaml_config = self.__configs["yaml"]
158
-
159
- yaml_map = {
160
- "title": self.title,
161
- "chat_link": self.url,
162
- "create_time": self.create_time,
163
- "update_time": self.update_time,
164
- "model": self.model,
165
- "used_plugins": self.plugins,
166
- "message_count": self.message_count("user", "assistant"),
167
- "content_types": self.content_types,
168
- "custom_instructions": self.custom_instructions,
169
- }
170
-
171
- yaml = ""
172
-
173
- for key, value in yaml_map.items():
174
- if yaml_config.get(key, True):
175
- yaml += f"{key}: {value}\n"
176
-
177
- if not yaml:
178
- return ""
179
-
180
- return f"---\n{yaml}---\n"
181
-
182
- @property
183
- def markdown(self) -> str:
184
- """Return the full markdown text content of the conversation."""
185
- markdown_config = self.__configs["markdown"]
186
- latex_delimiters = markdown_config["latex_delimiters"]
187
-
188
- markdown = self.yaml
189
-
190
- for node in self._all_message_nodes:
191
- if node.message:
192
- content = close_code_blocks(node.message.text)
193
- # prevent empty messages from taking up white space
194
- content = f"\n{content}\n" if content else ""
195
- if latex_delimiters == "dollars":
196
- content = replace_latex_delimiters(content)
197
- markdown += f"\n{node.header}{content}{node.footer}\n---\n"
198
-
199
- return markdown
200
-
201
- def save(self, filepath: Path | str) -> None:
202
- """Save the conversation to the file, with added modification time."""
203
- filepath = Path(filepath)
204
- base_file_name = sanitize(filepath.stem)
205
-
206
- counter = 0
207
- while filepath.exists():
208
- counter += 1
209
- filepath = filepath.with_name(
210
- f"{base_file_name} ({counter}){filepath.suffix}",
211
- )
212
-
213
- with filepath.open("w", encoding="utf-8") as file:
214
- file.write(self.markdown)
215
-
216
- os_utime(filepath, (self.update_time.timestamp(), self.update_time.timestamp()))
217
-
218
- def timestamps(
219
- self,
220
- *authors: AuthorRole,
221
- ) -> list[float]:
222
- """List of all message timestamps from the given author role (all branches).
223
-
224
- Useful for generating time graphs.
225
- """
226
- if len(authors) == 0:
227
- authors = ("user",)
228
- return [
229
- node.message.create_time.timestamp()
230
- for node in self._author_nodes(*authors)
231
- if node.message and node.message.create_time
232
- ]
233
-
234
- def plaintext(
235
- self,
236
- *authors: AuthorRole,
237
- ) -> str:
238
- """Entire plain text from the given author role (all branches).
239
-
240
- Useful for generating word clouds.
241
- """
242
- if len(authors) == 0:
243
- authors = ("user",)
244
- return "\n".join(
245
- node.message.text for node in self._author_nodes(*authors) if node.message
246
- )
247
-
248
- def wordcloud(
249
- self,
250
- *authors: AuthorRole,
251
- **kwargs: Unpack[WordCloudKwargs],
252
- ) -> Image:
253
- """Generate a wordcloud from the conversation."""
254
- if len(authors) == 0:
255
- authors = ("user",)
256
- text = self.plaintext(*authors)
257
- return generate_wordcloud(text, **kwargs)
258
-
259
- @property
260
- def week_start(self) -> datetime:
261
- """Return the monday of the week the conversation was created in."""
262
- start_of_week = self.create_time - timedelta(
263
- days=self.create_time.weekday(),
264
- )
265
-
266
- return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
267
-
268
- @property
269
- def month_start(self) -> datetime:
270
- """Return the first of the month the conversation was created in."""
271
- return self.create_time.replace(
272
- day=1,
273
- hour=0,
274
- minute=0,
275
- second=0,
276
- microsecond=0,
277
- )
278
-
279
- @property
280
- def year_start(self) -> datetime:
281
- """Return the first of January of the year the conversation was created in."""
282
- return self.create_time.replace(
283
- month=1,
284
- day=1,
285
- hour=0,
286
- minute=0,
287
- second=0,
288
- microsecond=0,
289
- )