PyPI - convoviz - Versions diffs - 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

convoviz 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

convoviz/__init__.py +25 -5
convoviz/__main__.py +6 -5
convoviz/analysis/__init__.py +9 -0
convoviz/analysis/graphs.py +98 -0
convoviz/analysis/wordcloud.py +142 -0
convoviz/assets/colormaps.txt +15 -16
convoviz/cli.py +101 -94
convoviz/config.py +88 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +178 -0
convoviz/io/__init__.py +21 -0
convoviz/io/loaders.py +135 -0
convoviz/io/writers.py +96 -0
convoviz/models/__init__.py +26 -6
convoviz/models/collection.py +107 -0
convoviz/models/conversation.py +149 -0
convoviz/models/message.py +77 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +120 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +182 -0
convoviz/renderers/yaml.py +42 -0
convoviz/utils.py +68 -237
{convoviz-0.1.6.dist-info → convoviz-0.2.0.dist-info}/METADATA +61 -42
{convoviz-0.1.6.dist-info → convoviz-0.2.0.dist-info}/RECORD +27 -17
convoviz-0.2.0.dist-info/WHEEL +4 -0
convoviz-0.2.0.dist-info/entry_points.txt +3 -0
convoviz/configuration.py +0 -125
convoviz/data_analysis.py +0 -118
convoviz/long_runs.py +0 -91
convoviz/models/_conversation.py +0 -288
convoviz/models/_conversation_set.py +0 -190
convoviz/models/_message.py +0 -89
convoviz/models/_node.py +0 -74
convoviz-0.1.6.dist-info/LICENSE +0 -21
convoviz-0.1.6.dist-info/WHEEL +0 -4

convoviz/configuration.py DELETED Viewed

@@ -1,125 +0,0 @@
-"""Module for handling user configuration and updating the models."""
-from __future__ import annotations
-from questionary import (
-    Choice,
-    Style,
-    checkbox,
-    select,
-)
-from questionary import (
-    path as qst_path,
-)
-from questionary import (
-    text as qst_text,
-)
-from .models import Conversation, Message
-from .utils import (
-    DEFAULT_USER_CONFIGS,
-    colormaps,
-    font_names,
-    font_path,
-    stem,
-    validate_header,
-    validate_zip,
-)
-CUSTOM_STYLE = Style(
-    [
-        ("qmark", "fg:#34eb9b bold"),
-        ("question", "bold fg:#e0e0e0"),
-        ("answer", "fg:#34ebeb bold"),
-        ("pointer", "fg:#e834eb bold"),
-        ("highlighted", "fg:#349ceb bold"),
-        ("selected", "fg:#34ebeb"),
-        ("separator", "fg:#eb3434"),
-        ("instruction", "fg:#eb9434"),
-        ("text", "fg:#b2eb34"),
-        ("disabled", "fg:#858585 italic"),
-    ],
-)
-class UserConfigs:
-    """Class for handling user configuration."""
-    def __init__(self) -> None:
-        """Initialize UserConfigs object."""
-        self.configs = DEFAULT_USER_CONFIGS.copy()
-        # will implement a way to read from a config file later ...
-    def prompt(self) -> None:
-        """Prompt the user for input and update the configs."""
-        lookup = self.configs
-        lookup["zip_filepath"] = qst_path(
-            "Enter the path to the zip file :",
-            lookup["zip_filepath"],
-            validate=validate_zip,
-            style=CUSTOM_STYLE,
-        ).ask()
-        lookup["output_folder"] = qst_path(
-            "Enter the path to the output folder :",
-            lookup["output_folder"],
-            style=CUSTOM_STYLE,
-        ).ask()
-        for author_role in lookup["message"]["author_headers"]:
-            lookup["message"]["author_headers"][author_role] = qst_text(
-                f"Enter the message header (#) for messages from '{author_role}' :",
-                lookup["message"]["author_headers"][author_role],
-                validate=validate_header,
-                style=CUSTOM_STYLE,
-            ).ask()
-        lookup["conversation"]["markdown"]["latex_delimiters"] = select(
-            "Select the LaTeX math delimiters you want to use :",
-            ["default", "dollars"],
-            lookup["conversation"]["markdown"]["latex_delimiters"],
-            style=CUSTOM_STYLE,
-        ).ask()
-        yaml_choices = [
-            Choice(title=header, checked=value)
-            for header, value in lookup["conversation"]["yaml"].items()
-        ]
-        selected_headers = checkbox(
-            "Select the YAML metadata headers you want to include :",
-            yaml_choices,
-            style=CUSTOM_STYLE,
-        ).ask()
-        for header in lookup["conversation"]["yaml"]:
-            lookup["conversation"]["yaml"][header] = header in selected_headers
-        font_name: str = select(
-            "Select the font you want to use for the word clouds :",
-            font_names(),
-            stem(lookup["wordcloud"].get("font_path") or ""),
-            style=CUSTOM_STYLE,
-        ).ask()
-        lookup["wordcloud"]["font_path"] = str(font_path(font_name))
-        lookup["wordcloud"]["colormap"] = select(
-            "Select the color theme you want to use for the word clouds :",
-            colormaps(),
-            lookup["wordcloud"].get("colormap"),
-            style=CUSTOM_STYLE,
-        ).ask()
-        lookup["wordcloud"]["custom_stopwords"] = qst_text(
-            "Enter custom stopwords (separated by commas) :",
-            lookup["wordcloud"].get("custom_stopwords", ""),
-            style=CUSTOM_STYLE,
-        ).ask()
-    def set_model_configs(self) -> None:
-        """Set the configuration for all models."""
-        Message.update_configs(self.configs["message"])
-        Conversation.update_configs(self.configs["conversation"])

convoviz/data_analysis.py DELETED Viewed

@@ -1,118 +0,0 @@
-"""Module for all the data visualizations.
-Should ideally only return matplotlib objects, and not deal with the filesystem.
-"""
-# pyright: reportUnknownMemberType = false
-from __future__ import annotations
-from collections import defaultdict
-from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Unpack
-from matplotlib.figure import Figure
-from nltk import download as nltk_download  # type: ignore[import-untyped]
-from nltk.corpus import stopwords as nltk_stopwords  # type: ignore[import-untyped]
-from nltk.data import find as nltk_find  # type: ignore[import-untyped]
-from wordcloud import WordCloud  # type: ignore[import-untyped]
-from .utils import DEFAULT_WORDCLOUD_CONFIGS
-if TYPE_CHECKING:
-    from PIL.Image import Image
-    from .utils import GraphKwargs, WordCloudKwargs
-def generate_week_barplot(
-    timestamps: list[float],
-    title: str,
-    **kwargs: Unpack[GraphKwargs],
-) -> Figure:
-    """Create a bar graph from the given timestamps, collapsed on one week."""
-    dates = [datetime.fromtimestamp(ts, timezone.utc) for ts in timestamps]
-    weekday_counts: defaultdict[str, int] = defaultdict(int)
-    days = [
-        "Monday",
-        "Tuesday",
-        "Wednesday",
-        "Thursday",
-        "Friday",
-        "Saturday",
-        "Sunday",
-    ]
-    for date in dates:
-        weekday_counts[days[date.weekday()]] += 1
-    x = days
-    y = [weekday_counts[day] for day in days]
-    fig = Figure(dpi=300)
-    ax = fig.add_subplot()
-    ax.bar(x, y)
-    ax.set_xlabel("Weekday")
-    ax.set_ylabel("Prompt Count")
-    ax.set_title(title)
-    ax.set_xticks(x)
-    ax.set_xticklabels(x, rotation=45)
-    fig.tight_layout()
-    return fig
-# Ensure that the stopwords are downloaded
-def _load_nltk_stopwords() -> set[str]:
-    """Load nltk stopwords."""
-    try:
-        nltk_find("corpora/stopwords")
-    except LookupError:
-        nltk_download("stopwords")
-    languages = [
-        "arabic",
-        "english",
-        "french",
-        "german",
-        "spanish",
-        "portuguese",
-    ]  # add more languages here ...
-    return {word for lang in languages for word in nltk_stopwords.words(fileids=lang)}
-def generate_wordcloud(
-    text: str,
-    **kwargs: Unpack[WordCloudKwargs],
-) -> Image:
-    """Create a wordcloud from the given text."""
-    configs = DEFAULT_WORDCLOUD_CONFIGS.copy()
-    configs.update(kwargs)
-    nltk_stopwords = _load_nltk_stopwords()
-    custom_stopwords = configs.get("custom_stopwords")
-    custom_stopwords_list = custom_stopwords.split(sep=",") if custom_stopwords else []
-    custom_stopwords_list = [
-        word.strip().lower() for word in custom_stopwords_list if word.strip()
-    ]
-    stopwords = nltk_stopwords.union(set(custom_stopwords_list))
-    wordcloud = WordCloud(
-        font_path=configs.get("font_path"),
-        width=configs.get("width"),  # pyright: ignore[reportGeneralTypeIssues]
-        height=configs.get("height"),  # pyright: ignore[reportGeneralTypeIssues]
-        stopwords=stopwords,  # pyright: ignore[reportGeneralTypeIssues]
-        background_color=configs.get("background_color"),  # pyright: ignore[reportGeneralTypeIssues]
-        mode=configs.get("mode"),  # pyright: ignore[reportGeneralTypeIssues]
-        colormap=configs.get("colormap"),
-        include_numbers=configs.get("include_numbers"),  # pyright: ignore[reportGeneralTypeIssues]
-    ).generate(text)
-    return wordcloud.to_image()

convoviz/long_runs.py DELETED Viewed

@@ -1,91 +0,0 @@
-"""Module for various processes that are used in the controllers."""
-from __future__ import annotations
-from pathlib import Path
-from typing import TYPE_CHECKING, Unpack
-from tqdm import tqdm
-if TYPE_CHECKING:
-    from .models import ConversationSet
-    from .utils import GraphKwargs, WordCloudKwargs
-def generate_week_barplots(
-    conv_set: ConversationSet,
-    dir_path: Path | str,
-    *,
-    progress_bar: bool = False,
-    **kwargs: Unpack[GraphKwargs],
-) -> None:
-    """Create the weekwise graphs and save them to the folder."""
-    dir_path = Path(dir_path)
-    month_groups = conv_set.group_by_month()
-    year_groups = conv_set.group_by_year()
-    for month in tqdm(
-        month_groups.keys(),
-        "Creating monthly weekwise graphs 📈 ",
-        disable=not progress_bar,
-    ):
-        title = month.strftime("%B '%y")
-        month_groups[month].week_barplot(title, **kwargs).savefig(  # pyright: ignore [reportUnknownMemberType]
-            dir_path / f"{month.strftime('%Y %B')}.png",
-        )
-    for year in tqdm(
-        year_groups.keys(),
-        "Creating yearly weekwise graphs 📈 ",
-        disable=not progress_bar,
-    ):
-        title = year.strftime("%Y")
-        year_groups[year].week_barplot(title, **kwargs).savefig(  # pyright: ignore [reportUnknownMemberType]
-            dir_path / f"{year.strftime('%Y')}.png",
-        )
-def generate_wordclouds(
-    conv_set: ConversationSet,
-    dir_path: Path | str,
-    *,
-    progress_bar: bool = False,
-    **kwargs: Unpack[WordCloudKwargs],
-) -> None:
-    """Create the wordclouds and save them to the folder."""
-    dir_path = Path(dir_path)
-    week_groups = conv_set.group_by_week()
-    month_groups = conv_set.group_by_month()
-    year_groups = conv_set.group_by_year()
-    for week in tqdm(
-        week_groups.keys(),
-        "Creating weekly wordclouds 🔡☁️ ",
-        disable=not progress_bar,
-    ):
-        week_groups[week].wordcloud(**kwargs).save(
-            dir_path / f"{week.strftime('%Y week %W')}.png",
-            optimize=True,
-        )
-    for month in tqdm(
-        month_groups.keys(),
-        "Creating monthly wordclouds 🔡☁️ ",
-        disable=not progress_bar,
-    ):
-        month_groups[month].wordcloud(**kwargs).save(
-            dir_path / f"{month.strftime('%Y %B')}.png",
-            optimize=True,
-        )
-    for year in tqdm(
-        year_groups.keys(),
-        "Creating yearly wordclouds 🔡☁️ ",
-        disable=not progress_bar,
-    ):
-        year_groups[year].wordcloud(**kwargs).save(
-            dir_path / f"{year.strftime('%Y')}.png",
-            optimize=True,
-        )

convoviz/models/_conversation.py DELETED Viewed

@@ -1,288 +0,0 @@
-"""Conversation model. Represents a single ChatGPT chat.
-object path : conversations.json -> conversation (one of the list items)
-"""
-from __future__ import annotations
-from datetime import datetime, timedelta
-from os import utime as os_utime
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, ClassVar, Unpack
-from orjson import loads
-from pydantic import BaseModel
-from convoviz.data_analysis import generate_wordcloud
-from convoviz.utils import (
-    DEFAULT_CONVERSATION_CONFIGS,
-    ConversationConfigs,
-    WordCloudKwargs,
-    close_code_blocks,
-    replace_latex_delimiters,
-    sanitize,
-)
-from ._node import Node
-if TYPE_CHECKING:
-    from PIL.Image import Image
-    from ._message import AuthorRole
-class Conversation(BaseModel):
-    """Wrapper class for a `conversation` in _a_ `json` file."""
-    __configs: ClassVar[ConversationConfigs] = DEFAULT_CONVERSATION_CONFIGS
-    title: str
-    create_time: datetime
-    update_time: datetime
-    mapping: dict[str, Node]
-    moderation_results: list[Any]
-    current_node: str
-    plugin_ids: list[str] | None = None
-    conversation_id: str
-    conversation_template_id: str | None = None
-    id: str | None = None  # noqa: A003
-    @classmethod
-    def update_configs(cls, configs: ConversationConfigs) -> None:
-        """Set the configuration for all conversations."""
-        cls.__configs.update(configs)
-    @classmethod
-    def from_json(cls, filepath: Path | str) -> Conversation:
-        """Load the conversation from a JSON file."""
-        filepath = Path(filepath)
-        with filepath.open(encoding="utf-8") as file:
-            return cls(**loads(file.read()))
-    @property
-    def node_mapping(self) -> dict[str, Node]:
-        """Return a dictionary of connected Node objects, based on the mapping."""
-        return Node.mapping(self.mapping)
-    @property
-    def _all_message_nodes(self) -> list[Node]:
-        """List of all nodes that have a message, including all branches."""
-        return [node for node in self.node_mapping.values() if node.message]
-    def _author_nodes(
-        self,
-        *authors: AuthorRole,
-    ) -> list[Node]:
-        """List of all nodes with the given author role (all branches)."""
-        if len(authors) == 0:
-            authors = ("user",)
-        return [
-            node
-            for node in self._all_message_nodes
-            if node.message and node.message.author.role in authors
-        ]
-    @property
-    def leaf_count(self) -> int:
-        """Return the number of leaves in the conversation."""
-        return sum(1 for node in self._all_message_nodes if not node.children_nodes)
-    @property
-    def url(self) -> str:
-        """Chat URL."""
-        return f"https://chat.openai.com/c/{self.conversation_id}"
-    @property
-    def content_types(self) -> list[str]:
-        """List of all content types in the conversation (all branches)."""
-        return list(
-            {
-                node.message.content.content_type
-                for node in self._all_message_nodes
-                if node.message
-            },
-        )
-    def message_count(
-        self,
-        *authors: AuthorRole,
-    ) -> int:
-        """Return the number of 'user' and 'assistant' messages (all branches)."""
-        if len(authors) == 0:
-            authors = ("user",)
-        return len(self._author_nodes(*authors))
-    @property
-    def model(self) -> str | None:
-        """ChatGPT model used for the conversation."""
-        assistant_nodes: list[Node] = self._author_nodes("assistant")
-        if not assistant_nodes:
-            return None
-        message = assistant_nodes[0].message
-        return message.metadata.model_slug if message else None
-    @property
-    def plugins(self) -> list[str]:
-        """List of all ChatGPT plugins used in the conversation."""
-        return list(
-            {
-                node.message.metadata.invoked_plugin["namespace"]
-                for node in self._author_nodes("tool")
-                if node.message and node.message.metadata.invoked_plugin
-            },
-        )
-    @property
-    def custom_instructions(self) -> dict[str, str]:
-        """Return custom instructions used for the conversation."""
-        system_nodes = self._author_nodes("system")
-        if len(system_nodes) < 2:
-            return {}
-        context_message = system_nodes[1].message
-        if context_message and context_message.metadata.is_user_system_message:
-            return context_message.metadata.user_context_message_data or {}
-        return {}
-        # TODO: check if this is the same for conversations from the bookmarklet
-    @property
-    def yaml(self) -> str:
-        """YAML metadata header for the conversation."""
-        yaml_config = self.__configs["yaml"]
-        yaml_map = {
-            "title": self.title,
-            "chat_link": self.url,
-            "create_time": self.create_time,
-            "update_time": self.update_time,
-            "model": self.model,
-            "used_plugins": self.plugins,
-            "message_count": self.message_count("user", "assistant"),
-            "content_types": self.content_types,
-            "custom_instructions": self.custom_instructions,
-        }
-        yaml = ""
-        for key, value in yaml_map.items():
-            if yaml_config.get(key, True):
-                yaml += f"{key}: {value}\n"
-        if not yaml:
-            return ""
-        return f"---\n{yaml}---\n"
-    @property
-    def markdown(self) -> str:
-        """Return the full markdown text content of the conversation."""
-        markdown_config = self.__configs["markdown"]
-        latex_delimiters = markdown_config["latex_delimiters"]
-        markdown = self.yaml
-        for node in self._all_message_nodes:
-            if node.message:
-                content = close_code_blocks(node.message.text)
-                # prevent empty messages from taking up white space
-                content = f"\n{content}\n" if content else ""
-                if latex_delimiters == "dollars":
-                    content = replace_latex_delimiters(content)
-                markdown += f"\n{node.header}{content}{node.footer}\n---\n"
-        return markdown
-    def save(self, filepath: Path | str) -> None:
-        """Save the conversation to the file, with added modification time."""
-        filepath = Path(filepath)
-        base_file_name = sanitize(filepath.stem)
-        counter = 0
-        while filepath.exists():
-            counter += 1
-            filepath = filepath.with_name(
-                f"{base_file_name} ({counter}){filepath.suffix}",
-            )
-        with filepath.open("w", encoding="utf-8") as file:
-            file.write(self.markdown)
-        os_utime(filepath, (self.update_time.timestamp(), self.update_time.timestamp()))
-    def timestamps(
-        self,
-        *authors: AuthorRole,
-    ) -> list[float]:
-        """List of all message timestamps from the given author role (all branches).
-        Useful for generating time graphs.
-        """
-        if len(authors) == 0:
-            authors = ("user",)
-        return [
-            node.message.create_time.timestamp()
-            for node in self._author_nodes(*authors)
-            if node.message and node.message.create_time
-        ]
-    def plaintext(
-        self,
-        *authors: AuthorRole,
-    ) -> str:
-        """Entire plain text from the given author role (all branches).
-        Useful for generating word clouds.
-        """
-        if len(authors) == 0:
-            authors = ("user",)
-        return "\n".join(
-            node.message.text for node in self._author_nodes(*authors) if node.message
-        )
-    def wordcloud(
-        self,
-        *authors: AuthorRole,
-        **kwargs: Unpack[WordCloudKwargs],
-    ) -> Image:
-        """Generate a wordcloud from the conversation."""
-        if len(authors) == 0:
-            authors = ("user",)
-        text = self.plaintext(*authors)
-        return generate_wordcloud(text, **kwargs)
-    @property
-    def week_start(self) -> datetime:
-        """Return the monday of the week the conversation was created in."""
-        start_of_week = self.create_time - timedelta(
-            days=self.create_time.weekday(),
-        )
-        return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
-    @property
-    def month_start(self) -> datetime:
-        """Return the first of the month the conversation was created in."""
-        return self.create_time.replace(
-            day=1,
-            hour=0,
-            minute=0,
-            second=0,
-            microsecond=0,
-        )
-    @property
-    def year_start(self) -> datetime:
-        """Return the first of January of the year the conversation was created in."""
-        return self.create_time.replace(
-            month=1,
-            day=1,
-            hour=0,
-            minute=0,
-            second=0,
-            microsecond=0,
-        )

convoviz 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

convoviz 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl