convoviz 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
convoviz/configuration.py DELETED
@@ -1,125 +0,0 @@
1
- """Module for handling user configuration and updating the models."""
2
-
3
- from __future__ import annotations
4
-
5
- from questionary import (
6
- Choice,
7
- Style,
8
- checkbox,
9
- select,
10
- )
11
- from questionary import (
12
- path as qst_path,
13
- )
14
- from questionary import (
15
- text as qst_text,
16
- )
17
-
18
- from .models import Conversation, Message
19
- from .utils import (
20
- DEFAULT_USER_CONFIGS,
21
- colormaps,
22
- font_names,
23
- font_path,
24
- stem,
25
- validate_header,
26
- validate_zip,
27
- )
28
-
29
- CUSTOM_STYLE = Style(
30
- [
31
- ("qmark", "fg:#34eb9b bold"),
32
- ("question", "bold fg:#e0e0e0"),
33
- ("answer", "fg:#34ebeb bold"),
34
- ("pointer", "fg:#e834eb bold"),
35
- ("highlighted", "fg:#349ceb bold"),
36
- ("selected", "fg:#34ebeb"),
37
- ("separator", "fg:#eb3434"),
38
- ("instruction", "fg:#eb9434"),
39
- ("text", "fg:#b2eb34"),
40
- ("disabled", "fg:#858585 italic"),
41
- ],
42
- )
43
-
44
-
45
- class UserConfigs:
46
- """Class for handling user configuration."""
47
-
48
- def __init__(self) -> None:
49
- """Initialize UserConfigs object."""
50
- self.configs = DEFAULT_USER_CONFIGS.copy()
51
-
52
- # will implement a way to read from a config file later ...
53
-
54
- def prompt(self) -> None:
55
- """Prompt the user for input and update the configs."""
56
- lookup = self.configs
57
-
58
- lookup["zip_filepath"] = qst_path(
59
- "Enter the path to the zip file :",
60
- lookup["zip_filepath"],
61
- validate=validate_zip,
62
- style=CUSTOM_STYLE,
63
- ).ask()
64
-
65
- lookup["output_folder"] = qst_path(
66
- "Enter the path to the output folder :",
67
- lookup["output_folder"],
68
- style=CUSTOM_STYLE,
69
- ).ask()
70
-
71
- for author_role in lookup["message"]["author_headers"]:
72
- lookup["message"]["author_headers"][author_role] = qst_text(
73
- f"Enter the message header (#) for messages from '{author_role}' :",
74
- lookup["message"]["author_headers"][author_role],
75
- validate=validate_header,
76
- style=CUSTOM_STYLE,
77
- ).ask()
78
-
79
- lookup["conversation"]["markdown"]["latex_delimiters"] = select(
80
- "Select the LaTeX math delimiters you want to use :",
81
- ["default", "dollars"],
82
- lookup["conversation"]["markdown"]["latex_delimiters"],
83
- style=CUSTOM_STYLE,
84
- ).ask()
85
-
86
- yaml_choices = [
87
- Choice(title=header, checked=value)
88
- for header, value in lookup["conversation"]["yaml"].items()
89
- ]
90
-
91
- selected_headers = checkbox(
92
- "Select the YAML metadata headers you want to include :",
93
- yaml_choices,
94
- style=CUSTOM_STYLE,
95
- ).ask()
96
-
97
- for header in lookup["conversation"]["yaml"]:
98
- lookup["conversation"]["yaml"][header] = header in selected_headers
99
-
100
- font_name: str = select(
101
- "Select the font you want to use for the word clouds :",
102
- font_names(),
103
- stem(lookup["wordcloud"].get("font_path") or ""),
104
- style=CUSTOM_STYLE,
105
- ).ask()
106
-
107
- lookup["wordcloud"]["font_path"] = str(font_path(font_name))
108
-
109
- lookup["wordcloud"]["colormap"] = select(
110
- "Select the color theme you want to use for the word clouds :",
111
- colormaps(),
112
- lookup["wordcloud"].get("colormap"),
113
- style=CUSTOM_STYLE,
114
- ).ask()
115
-
116
- lookup["wordcloud"]["custom_stopwords"] = qst_text(
117
- "Enter custom stopwords (separated by commas) :",
118
- lookup["wordcloud"].get("custom_stopwords", ""),
119
- style=CUSTOM_STYLE,
120
- ).ask()
121
-
122
- def set_model_configs(self) -> None:
123
- """Set the configuration for all models."""
124
- Message.update_configs(self.configs["message"])
125
- Conversation.update_configs(self.configs["conversation"])
convoviz/data_analysis.py DELETED
@@ -1,118 +0,0 @@
1
- """Module for all the data visualizations.
2
-
3
- Should ideally only return matplotlib objects, and not deal with the filesystem.
4
- """
5
-
6
- # pyright: reportUnknownMemberType = false
7
-
8
- from __future__ import annotations
9
-
10
- from collections import defaultdict
11
- from datetime import datetime, timezone
12
- from typing import TYPE_CHECKING, Unpack
13
-
14
- from matplotlib.figure import Figure
15
- from nltk import download as nltk_download # type: ignore[import-untyped]
16
- from nltk.corpus import stopwords as nltk_stopwords # type: ignore[import-untyped]
17
- from nltk.data import find as nltk_find # type: ignore[import-untyped]
18
- from wordcloud import WordCloud # type: ignore[import-untyped]
19
-
20
- from .utils import DEFAULT_WORDCLOUD_CONFIGS
21
-
22
- if TYPE_CHECKING:
23
- from PIL.Image import Image
24
-
25
- from .utils import GraphKwargs, WordCloudKwargs
26
-
27
-
28
- def generate_week_barplot(
29
- timestamps: list[float],
30
- title: str,
31
- **kwargs: Unpack[GraphKwargs],
32
- ) -> Figure:
33
- """Create a bar graph from the given timestamps, collapsed on one week."""
34
- dates = [datetime.fromtimestamp(ts, timezone.utc) for ts in timestamps]
35
-
36
- weekday_counts: defaultdict[str, int] = defaultdict(int)
37
- days = [
38
- "Monday",
39
- "Tuesday",
40
- "Wednesday",
41
- "Thursday",
42
- "Friday",
43
- "Saturday",
44
- "Sunday",
45
- ]
46
-
47
- for date in dates:
48
- weekday_counts[days[date.weekday()]] += 1
49
-
50
- x = days
51
- y = [weekday_counts[day] for day in days]
52
-
53
- fig = Figure(dpi=300)
54
- ax = fig.add_subplot()
55
-
56
- ax.bar(x, y)
57
- ax.set_xlabel("Weekday")
58
- ax.set_ylabel("Prompt Count")
59
-
60
- ax.set_title(title)
61
-
62
- ax.set_xticks(x)
63
- ax.set_xticklabels(x, rotation=45)
64
- fig.tight_layout()
65
-
66
- return fig
67
-
68
-
69
- # Ensure that the stopwords are downloaded
70
- def _load_nltk_stopwords() -> set[str]:
71
- """Load nltk stopwords."""
72
- try:
73
- nltk_find("corpora/stopwords")
74
- except LookupError:
75
- nltk_download("stopwords")
76
-
77
- languages = [
78
- "arabic",
79
- "english",
80
- "french",
81
- "german",
82
- "spanish",
83
- "portuguese",
84
- ] # add more languages here ...
85
-
86
- return {word for lang in languages for word in nltk_stopwords.words(fileids=lang)}
87
-
88
-
89
- def generate_wordcloud(
90
- text: str,
91
- **kwargs: Unpack[WordCloudKwargs],
92
- ) -> Image:
93
- """Create a wordcloud from the given text."""
94
- configs = DEFAULT_WORDCLOUD_CONFIGS.copy()
95
- configs.update(kwargs)
96
-
97
- nltk_stopwords = _load_nltk_stopwords()
98
-
99
- custom_stopwords = configs.get("custom_stopwords")
100
- custom_stopwords_list = custom_stopwords.split(sep=",") if custom_stopwords else []
101
- custom_stopwords_list = [
102
- word.strip().lower() for word in custom_stopwords_list if word.strip()
103
- ]
104
-
105
- stopwords = nltk_stopwords.union(set(custom_stopwords_list))
106
-
107
- wordcloud = WordCloud(
108
- font_path=configs.get("font_path"),
109
- width=configs.get("width"), # pyright: ignore[reportGeneralTypeIssues]
110
- height=configs.get("height"), # pyright: ignore[reportGeneralTypeIssues]
111
- stopwords=stopwords, # pyright: ignore[reportGeneralTypeIssues]
112
- background_color=configs.get("background_color"), # pyright: ignore[reportGeneralTypeIssues]
113
- mode=configs.get("mode"), # pyright: ignore[reportGeneralTypeIssues]
114
- colormap=configs.get("colormap"),
115
- include_numbers=configs.get("include_numbers"), # pyright: ignore[reportGeneralTypeIssues]
116
- ).generate(text)
117
-
118
- return wordcloud.to_image()
convoviz/long_runs.py DELETED
@@ -1,91 +0,0 @@
1
- """Module for various processes that are used in the controllers."""
2
-
3
- from __future__ import annotations
4
-
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Unpack
7
-
8
- from tqdm import tqdm
9
-
10
- if TYPE_CHECKING:
11
- from .models import ConversationSet
12
- from .utils import GraphKwargs, WordCloudKwargs
13
-
14
-
15
- def generate_week_barplots(
16
- conv_set: ConversationSet,
17
- dir_path: Path | str,
18
- *,
19
- progress_bar: bool = False,
20
- **kwargs: Unpack[GraphKwargs],
21
- ) -> None:
22
- """Create the weekwise graphs and save them to the folder."""
23
- dir_path = Path(dir_path)
24
-
25
- month_groups = conv_set.group_by_month()
26
- year_groups = conv_set.group_by_year()
27
-
28
- for month in tqdm(
29
- month_groups.keys(),
30
- "Creating monthly weekwise graphs 📈 ",
31
- disable=not progress_bar,
32
- ):
33
- title = month.strftime("%B '%y")
34
- month_groups[month].week_barplot(title, **kwargs).savefig( # pyright: ignore [reportUnknownMemberType]
35
- dir_path / f"{month.strftime('%Y %B')}.png",
36
- )
37
-
38
- for year in tqdm(
39
- year_groups.keys(),
40
- "Creating yearly weekwise graphs 📈 ",
41
- disable=not progress_bar,
42
- ):
43
- title = year.strftime("%Y")
44
- year_groups[year].week_barplot(title, **kwargs).savefig( # pyright: ignore [reportUnknownMemberType]
45
- dir_path / f"{year.strftime('%Y')}.png",
46
- )
47
-
48
-
49
- def generate_wordclouds(
50
- conv_set: ConversationSet,
51
- dir_path: Path | str,
52
- *,
53
- progress_bar: bool = False,
54
- **kwargs: Unpack[WordCloudKwargs],
55
- ) -> None:
56
- """Create the wordclouds and save them to the folder."""
57
- dir_path = Path(dir_path)
58
-
59
- week_groups = conv_set.group_by_week()
60
- month_groups = conv_set.group_by_month()
61
- year_groups = conv_set.group_by_year()
62
-
63
- for week in tqdm(
64
- week_groups.keys(),
65
- "Creating weekly wordclouds 🔡☁️ ",
66
- disable=not progress_bar,
67
- ):
68
- week_groups[week].wordcloud(**kwargs).save(
69
- dir_path / f"{week.strftime('%Y week %W')}.png",
70
- optimize=True,
71
- )
72
-
73
- for month in tqdm(
74
- month_groups.keys(),
75
- "Creating monthly wordclouds 🔡☁️ ",
76
- disable=not progress_bar,
77
- ):
78
- month_groups[month].wordcloud(**kwargs).save(
79
- dir_path / f"{month.strftime('%Y %B')}.png",
80
- optimize=True,
81
- )
82
-
83
- for year in tqdm(
84
- year_groups.keys(),
85
- "Creating yearly wordclouds 🔡☁️ ",
86
- disable=not progress_bar,
87
- ):
88
- year_groups[year].wordcloud(**kwargs).save(
89
- dir_path / f"{year.strftime('%Y')}.png",
90
- optimize=True,
91
- )
@@ -1,288 +0,0 @@
1
- """Conversation model. Represents a single ChatGPT chat.
2
-
3
- object path : conversations.json -> conversation (one of the list items)
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- from datetime import datetime, timedelta
9
- from os import utime as os_utime
10
- from pathlib import Path
11
- from typing import TYPE_CHECKING, Any, ClassVar, Unpack
12
-
13
- from orjson import loads
14
- from pydantic import BaseModel
15
-
16
- from convoviz.data_analysis import generate_wordcloud
17
- from convoviz.utils import (
18
- DEFAULT_CONVERSATION_CONFIGS,
19
- ConversationConfigs,
20
- WordCloudKwargs,
21
- close_code_blocks,
22
- replace_latex_delimiters,
23
- sanitize,
24
- )
25
-
26
- from ._node import Node
27
-
28
- if TYPE_CHECKING:
29
- from PIL.Image import Image
30
-
31
- from ._message import AuthorRole
32
-
33
-
34
- class Conversation(BaseModel):
35
- """Wrapper class for a `conversation` in _a_ `json` file."""
36
-
37
- __configs: ClassVar[ConversationConfigs] = DEFAULT_CONVERSATION_CONFIGS
38
-
39
- title: str
40
- create_time: datetime
41
- update_time: datetime
42
- mapping: dict[str, Node]
43
- moderation_results: list[Any]
44
- current_node: str
45
- plugin_ids: list[str] | None = None
46
- conversation_id: str
47
- conversation_template_id: str | None = None
48
- id: str | None = None # noqa: A003
49
-
50
- @classmethod
51
- def update_configs(cls, configs: ConversationConfigs) -> None:
52
- """Set the configuration for all conversations."""
53
- cls.__configs.update(configs)
54
-
55
- @classmethod
56
- def from_json(cls, filepath: Path | str) -> Conversation:
57
- """Load the conversation from a JSON file."""
58
- filepath = Path(filepath)
59
-
60
- with filepath.open(encoding="utf-8") as file:
61
- return cls(**loads(file.read()))
62
-
63
- @property
64
- def node_mapping(self) -> dict[str, Node]:
65
- """Return a dictionary of connected Node objects, based on the mapping."""
66
- return Node.mapping(self.mapping)
67
-
68
- @property
69
- def _all_message_nodes(self) -> list[Node]:
70
- """List of all nodes that have a message, including all branches."""
71
- return [node for node in self.node_mapping.values() if node.message]
72
-
73
- def _author_nodes(
74
- self,
75
- *authors: AuthorRole,
76
- ) -> list[Node]:
77
- """List of all nodes with the given author role (all branches)."""
78
- if len(authors) == 0:
79
- authors = ("user",)
80
- return [
81
- node
82
- for node in self._all_message_nodes
83
- if node.message and node.message.author.role in authors
84
- ]
85
-
86
- @property
87
- def leaf_count(self) -> int:
88
- """Return the number of leaves in the conversation."""
89
- return sum(1 for node in self._all_message_nodes if not node.children_nodes)
90
-
91
- @property
92
- def url(self) -> str:
93
- """Chat URL."""
94
- return f"https://chat.openai.com/c/{self.conversation_id}"
95
-
96
- @property
97
- def content_types(self) -> list[str]:
98
- """List of all content types in the conversation (all branches)."""
99
- return list(
100
- {
101
- node.message.content.content_type
102
- for node in self._all_message_nodes
103
- if node.message
104
- },
105
- )
106
-
107
- def message_count(
108
- self,
109
- *authors: AuthorRole,
110
- ) -> int:
111
- """Return the number of 'user' and 'assistant' messages (all branches)."""
112
- if len(authors) == 0:
113
- authors = ("user",)
114
- return len(self._author_nodes(*authors))
115
-
116
- @property
117
- def model(self) -> str | None:
118
- """ChatGPT model used for the conversation."""
119
- assistant_nodes: list[Node] = self._author_nodes("assistant")
120
- if not assistant_nodes:
121
- return None
122
-
123
- message = assistant_nodes[0].message
124
-
125
- return message.metadata.model_slug if message else None
126
-
127
- @property
128
- def plugins(self) -> list[str]:
129
- """List of all ChatGPT plugins used in the conversation."""
130
- return list(
131
- {
132
- node.message.metadata.invoked_plugin["namespace"]
133
- for node in self._author_nodes("tool")
134
- if node.message and node.message.metadata.invoked_plugin
135
- },
136
- )
137
-
138
- @property
139
- def custom_instructions(self) -> dict[str, str]:
140
- """Return custom instructions used for the conversation."""
141
- system_nodes = self._author_nodes("system")
142
- if len(system_nodes) < 2:
143
- return {}
144
-
145
- context_message = system_nodes[1].message
146
- if context_message and context_message.metadata.is_user_system_message:
147
- return context_message.metadata.user_context_message_data or {}
148
-
149
- return {}
150
-
151
- # TODO: check if this is the same for conversations from the bookmarklet
152
-
153
- @property
154
- def yaml(self) -> str:
155
- """YAML metadata header for the conversation."""
156
- yaml_config = self.__configs["yaml"]
157
-
158
- yaml_map = {
159
- "title": self.title,
160
- "chat_link": self.url,
161
- "create_time": self.create_time,
162
- "update_time": self.update_time,
163
- "model": self.model,
164
- "used_plugins": self.plugins,
165
- "message_count": self.message_count("user", "assistant"),
166
- "content_types": self.content_types,
167
- "custom_instructions": self.custom_instructions,
168
- }
169
-
170
- yaml = ""
171
-
172
- for key, value in yaml_map.items():
173
- if yaml_config.get(key, True):
174
- yaml += f"{key}: {value}\n"
175
-
176
- if not yaml:
177
- return ""
178
-
179
- return f"---\n{yaml}---\n"
180
-
181
- @property
182
- def markdown(self) -> str:
183
- """Return the full markdown text content of the conversation."""
184
- markdown_config = self.__configs["markdown"]
185
- latex_delimiters = markdown_config["latex_delimiters"]
186
-
187
- markdown = self.yaml
188
-
189
- for node in self._all_message_nodes:
190
- if node.message:
191
- content = close_code_blocks(node.message.text)
192
- # prevent empty messages from taking up white space
193
- content = f"\n{content}\n" if content else ""
194
- if latex_delimiters == "dollars":
195
- content = replace_latex_delimiters(content)
196
- markdown += f"\n{node.header}{content}{node.footer}\n---\n"
197
-
198
- return markdown
199
-
200
- def save(self, filepath: Path | str) -> None:
201
- """Save the conversation to the file, with added modification time."""
202
- filepath = Path(filepath)
203
- base_file_name = sanitize(filepath.stem)
204
-
205
- counter = 0
206
- while filepath.exists():
207
- counter += 1
208
- filepath = filepath.with_name(
209
- f"{base_file_name} ({counter}){filepath.suffix}",
210
- )
211
-
212
- with filepath.open("w", encoding="utf-8") as file:
213
- file.write(self.markdown)
214
-
215
- os_utime(filepath, (self.update_time.timestamp(), self.update_time.timestamp()))
216
-
217
- def timestamps(
218
- self,
219
- *authors: AuthorRole,
220
- ) -> list[float]:
221
- """List of all message timestamps from the given author role (all branches).
222
-
223
- Useful for generating time graphs.
224
- """
225
- if len(authors) == 0:
226
- authors = ("user",)
227
- return [
228
- node.message.create_time.timestamp()
229
- for node in self._author_nodes(*authors)
230
- if node.message and node.message.create_time
231
- ]
232
-
233
- def plaintext(
234
- self,
235
- *authors: AuthorRole,
236
- ) -> str:
237
- """Entire plain text from the given author role (all branches).
238
-
239
- Useful for generating word clouds.
240
- """
241
- if len(authors) == 0:
242
- authors = ("user",)
243
- return "\n".join(
244
- node.message.text for node in self._author_nodes(*authors) if node.message
245
- )
246
-
247
- def wordcloud(
248
- self,
249
- *authors: AuthorRole,
250
- **kwargs: Unpack[WordCloudKwargs],
251
- ) -> Image:
252
- """Generate a wordcloud from the conversation."""
253
- if len(authors) == 0:
254
- authors = ("user",)
255
- text = self.plaintext(*authors)
256
- return generate_wordcloud(text, **kwargs)
257
-
258
- @property
259
- def week_start(self) -> datetime:
260
- """Return the monday of the week the conversation was created in."""
261
- start_of_week = self.create_time - timedelta(
262
- days=self.create_time.weekday(),
263
- )
264
-
265
- return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
266
-
267
- @property
268
- def month_start(self) -> datetime:
269
- """Return the first of the month the conversation was created in."""
270
- return self.create_time.replace(
271
- day=1,
272
- hour=0,
273
- minute=0,
274
- second=0,
275
- microsecond=0,
276
- )
277
-
278
- @property
279
- def year_start(self) -> datetime:
280
- """Return the first of January of the year the conversation was created in."""
281
- return self.create_time.replace(
282
- month=1,
283
- day=1,
284
- hour=0,
285
- minute=0,
286
- second=0,
287
- microsecond=0,
288
- )