PyPI - sdialog - Versions diffs - 0.0.1__tar.gz - Mend

sdialog 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

sdialog-0.0.1/LICENSE +21 -0
sdialog-0.0.1/PKG-INFO +34 -0
sdialog-0.0.1/README.md +7 -0
sdialog-0.0.1/pyproject.toml +27 -0
sdialog-0.0.1/requirements.txt +12 -0
sdialog-0.0.1/setup.cfg +4 -0
sdialog-0.0.1/src/sdialog/__init__.py +131 -0
sdialog-0.0.1/src/sdialog/datasets.py +262 -0
sdialog-0.0.1/src/sdialog/generators.py +82 -0
sdialog-0.0.1/src/sdialog/orchestrators.py +224 -0
sdialog-0.0.1/src/sdialog/personas.py +299 -0
sdialog-0.0.1/src/sdialog/util.py +11 -0
sdialog-0.0.1/src/sdialog.egg-info/PKG-INFO +34 -0
sdialog-0.0.1/src/sdialog.egg-info/SOURCES.txt +15 -0
sdialog-0.0.1/src/sdialog.egg-info/dependency_links.txt +1 -0
sdialog-0.0.1/src/sdialog.egg-info/requires.txt +12 -0
sdialog-0.0.1/src/sdialog.egg-info/top_level.txt +1 -0

sdialog-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Idiap Research Institute
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

sdialog-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: sdialog
+Version: 0.0.1
+Summary: Synthetic Dialogue Generation and Analysis
+Author-email: Sergio Burdisso <sergio.burdisso@gmail.com>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/idiap/sdialog
+Project-URL: Issues, https://github.com/idiap/sdialog/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: print-color
+Requires-Dist: langchain
+Requires-Dist: langchain-ollama
+Requires-Dist: tqdm
+Requires-Dist: plotly
+Requires-Dist: sentence-transformers
+Requires-Dist: pandas
+Requires-Dist: tenacity
+Requires-Dist: numpy
+Requires-Dist: flake8
+Requires-Dist: pytest
+Requires-Dist: ollama
+Dynamic: license-file
+# SDialog
+Synthetic Dialogue Generation and Analysis
+_(Comming soon)_
+This package requires `Ollama` running is your system.

sdialog-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,7 @@
+# SDialog
+Synthetic Dialogue Generation and Analysis
+_(Comming soon)_
+This package requires `Ollama` running is your system.

sdialog-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[build-system]
+requires = ["setuptools >= 77.0.3"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "sdialog"
+version = "0.0.1"
+authors = [
+  { name="Sergio Burdisso", email="sergio.burdisso@gmail.com" },
+]
+description = "Synthetic Dialogue Generation and Analysis"
+readme = "README.md"
+requires-python = ">=3.9"
+dynamic = ["dependencies"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+]
+license = "MIT"
+license-files = ["LICEN[CS]E*"]
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+[project.urls]
+Homepage = "https://github.com/idiap/sdialog"
+Issues = "https://github.com/idiap/sdialog/issues"

sdialog-0.0.1/requirements.txt ADDED Viewed

@@ -0,0 +1,12 @@
+print-color
+langchain
+langchain-ollama
+tqdm
+plotly
+sentence-transformers
+pandas
+tenacity
+numpy
+flake8
+pytest
+ollama

sdialog-0.0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

sdialog-0.0.1/src/sdialog/__init__.py ADDED Viewed

@@ -0,0 +1,131 @@
+import json
+from pydantic import BaseModel
+from typing import List, Union, Optional
+from print_color import print
+from .util import make_serializable
+class Turn(BaseModel):
+    speaker: Optional[str]
+    text: str
+class Event(BaseModel):
+    agent: Optional[str] = None # "user", "system"
+    action: str  # "utter", "instruct"
+    actionLabel: Optional[str] = None # action label (e.g. type of instruct)
+    text: str  # the content of the event
+    timestamp: int  # timestemp
+class Dialog(BaseModel):
+    formatVersion: Optional[str] = "0.0.5"  # Version of the format
+    model: Optional[str] = None  # the model used to generate the dialogue
+    seed: Optional[int] = None  # the seed used to generated
+    dialogId: Optional[int] = None
+    complete: Optional[bool] = None
+    scenario: Optional[Union[dict, str]] = None  # the scenario used to generated the dialogue
+    turns: List[Turn]  # the list of turns of the conversation
+    events: Optional[List[Event]] = None
+    def __len__(self):
+        return len(self.turns)
+    def description(self, turn_template: str = "{speaker}: {text}"):
+        return "\n".join(turn_template.format(speaker=turn.speaker, text=turn.text.replace("\n", " "))
+                         for turn in self.turns)
+    def json(self, string: bool = False, indent: int = None):
+        data = self.model_dump()
+        make_serializable(data)
+        return json.dumps(data, indent=indent) if string else data
+    def print(self, *a, **kw):
+        print_dialog(self, *a, **kw)
+    def to_file(self, path: str, type: str = "auto"):  # type = "txt", "json" or "auto" which get's the type from the file extention
+        if type == "auto":
+            type = "json" if path.endswith(".json") else "txt"
+        with open(path, "w") as writer:
+            if type == "json":
+                writer.write(self.json(string=True))
+            else:
+                writer.write(self.description())
+    @staticmethod
+    def from_file(path: str, type: str = "auto"):  # type = "txt", "json" or "auto" which get's the type from the file extention
+        if type == "auto":
+            type = "json" if path.endswith(".json") else "txt"
+        with open(path) as reader:
+            if type == "json":
+                return Dialog.model_validate(json.load(reader))
+            lines = reader.read().split("\n")
+        return Dialog(turns=[Turn(speaker=line[:line.index(":")].strip(),
+                                  text=line[line.index(":") + 1:].strip())
+                             for line in lines if line])
+    # TODO: add from_dict as an alias of (so we don't have to use .model_validate())
+    __str__ = description
+class Instruction(BaseModel):
+    text: str = None
+    events: Optional[Union[Event, List[Event]]] = None  # extra events
+def print_dialog(dialog: Union[Dialog, dict], scenario: bool = False, orchestration: bool = False):
+    if type(dialog) == dict:
+        dialog = Dialog.model_validate(dialog)
+    speaker_tag_colors = ["red", "blue", "yellow", "cyan", "green", "magenta", "purple"]
+    speaker_utt_colors = ["grey", "white"]
+    # speaker_utt_colors = ["black", "grey"]
+    if dialog.dialogId:
+        print(dialog.dialogId, tag="dialog_id", tag_color="purple", color="magenta", format="bold")
+    if dialog.complete:
+        print(dialog.complete, tag="complete", tag_color="purple", color="magenta", format="bold")
+    if dialog.model:
+        print(dialog.model, tag="model", tag_color="purple", color="magenta", format="bold")
+    if dialog.seed:
+        print(dialog.seed, tag="seed", tag_color="purple", color="magenta", format="bold")
+    if scenario and dialog.scenario:
+        print("", tag="scenario", tag_color="purple", color="magenta", format="bold")
+        if type(dialog.scenario) == str:
+            print(dialog.scenario, color="magenta")
+        else:
+            print(json.dumps(dialog.scenario, indent=2), color="magenta")
+    print("--- Dialogue Begins ---", color="magenta", format="bold")
+    speakers = sorted(list(set(turn.speaker for turn in dialog.turns)))
+    if orchestration:
+        dialog = dialog.model_copy()
+        dialog.turns = [Turn(speaker=e.agent, text=e.text) if e.action == "utter"
+                        else (
+                            Turn(speaker=e.agent, text=f"[pick_suggestion] {e.text}") if e.action == "pick_suggestion"
+                            else
+                            Turn(speaker=e.action, text=f"({e.agent}) {e.text}"))
+                        for e in dialog.events]
+    for ix, turn in enumerate(dialog.turns):
+        speaker = turn.speaker
+        if speaker not in speakers:
+            tag_color = "yellow"
+            color = "purple"
+        else:
+            tag_color = speaker_tag_colors[speakers.index(speaker) % len(speaker_tag_colors)]
+            color = speaker_utt_colors[speakers.index(speaker) % len(speaker_utt_colors)]
+        print(turn.text,
+              tag=speaker,
+              tag_color=tag_color,
+              color=color)
+    print("--- Dialogue Ends ---", color="magenta", format="bold")

sdialog-0.0.1/src/sdialog/datasets.py ADDED Viewed

@@ -0,0 +1,262 @@
+import os
+import re
+import json
+from tqdm.auto import tqdm
+from . import Dialog, Turn, Event
+from .personas import Persona, PersonaAgent
+from .orchestrators import InstructionListOrchestrator, SimpleResponseOrchestrator
+class STAR:
+    _path = None
+    _speakers = ["User", "Wizard"]
+    @staticmethod
+    def set_path(path):
+        STAR._path = path
+    @staticmethod
+    def read_graph(task_name, as_dot: bool = True):
+        with open(os.path.join(STAR._path, f"tasks/{task_name}/{task_name}.json")) as reader:
+            if not as_dot:
+                return json.load(reader)["graph"]
+            dot_edges = ";\n".join(f"    {a} -> {b}" for a,b in json.load(reader)["graph"].items())
+        return "digraph %s  {\n%s\n}" % (task_name, dot_edges)
+    @staticmethod
+    def read_graph_responses(task_name, as_dict: bool = False):
+        with open(os.path.join(STAR._path, f"tasks/{task_name}/responses.json")) as reader:
+            responses = json.load(reader)
+            responses = {key:re.sub(r"{(.+?)(?::\w+?)?}", lambda m:m.group(1).upper(), value)
+                         for key, value in responses.items()
+                         if key != "out_of_scope"}
+            return responses if as_dict else json.dumps(responses, indent=2)
+    @staticmethod
+    def get_dialog(id):
+        with open(os.path.join(STAR._path, f"dialogues/{id}.json")) as reader:
+            dialog = json.load(reader)
+        for e in dialog["Events"]:
+            if e["Agent"] == "Wizard":
+                e["Agent"] = "System"
+        return Dialog(
+                dialogId=id,
+                scenario=dialog["Scenario"],
+                turns=[Turn(speaker=e["Agent"], text=e["Text"])
+                    for e in dialog["Events"]
+                    if e["Action"] in ["utter", "pick_suggestion"]],
+                events=[Event(agent=e["Agent"],
+                            action=e["Action"],
+                            actionLabel=e["ActionLabel"] if "ActionLabel" in e else None,
+                            text=e["Text"],
+                            timestamp=e["UnixTime"])
+                        for e in dialog["Events"]
+                        if "Text" in e]
+            )
+    @staticmethod
+    def get_dialogs(domain: str = None, task_name: str = None, happy: bool = None, multitask: bool = None):
+        dialogs = []
+        for fname in tqdm(os.listdir(os.path.join(STAR._path, f"dialogues/")), desc="Reading dialogs", leave=False):
+            if not fname.endswith(".json"):
+                continue
+            dialog_id = int(os.path.splitext(fname)[0])
+            scenario = STAR.get_dialog_scenario(dialog_id)
+            if (domain is None or domain in scenario["Domains"]) and \
+               (happy is None or scenario["Happy"] == happy) and \
+               (multitask is None or scenario["MultiTask"] == multitask) and \
+               (task_name is None or any(capability["Task"] == task_name for capability in scenario["WizardCapabilities"])):
+                dialogs.append(STAR.get_dialog(dialog_id))
+        return dialogs
+    @staticmethod
+    def get_dialog_scenario(id):
+        with open(os.path.join(STAR._path, f"dialogues/{id}.json")) as reader:
+            return json.load(reader)["Scenario"]
+    @staticmethod
+    def get_dialog_first_turn(id, speaker: str = None):
+        with open(os.path.join(STAR._path, f"dialogues/{id}.json")) as reader:
+            for event in json.load(reader)["Events"]:
+                turn_speaker = event["Agent"]
+                if speaker == None and turn_speaker in STAR._speakers:
+                        return Turn(speaker=turn_speaker, text=event["Text"])
+                elif turn_speaker == speaker:
+                    return Turn(speaker=turn_speaker, text=event["Text"])
+    @staticmethod
+    def get_dialog_task_names(id):
+        scenario = STAR.get_dialog_scenario(id)
+        return [task["Task"] for task in scenario["WizardCapabilities"]]
+    @staticmethod
+    def get_dialog_responses(id):
+        tasks = STAR.get_dialog_task_names(id)
+        return [STAR.read_graph_responses(task, as_dict=True) for task in tasks]
+    @staticmethod
+    def get_dialog_graphs(id):
+        tasks = STAR.get_dialog_task_names(id)
+        return [STAR.read_graph(task, as_dot=False) for task in tasks]
+    @staticmethod
+    def get_dialog_events(id):
+        with open(os.path.join(STAR._path, f"dialogues/{id}.json")) as reader:
+            return json.load(reader)["Events"]
+    @staticmethod
+    def get_dialog_events(id):
+            with open(os.path.join(STAR._path, f"dialogues/{id}.json")) as reader:
+                return json.load(reader)["Events"]
+    @staticmethod
+    def get_dialog_user_instructions(id):
+        def get_user_n_turns_before(turn_ix, events):
+            return len([e for e in events[:turn_ix]
+                        if e["Agent"] == "User" and e["Action"] == "utter"])
+        events = STAR.get_dialog_events(id)
+        return {get_user_n_turns_before(ix, events): e["Text"]
+                for ix, e in enumerate(events)
+                if e["Action"] == "instruct" and e["Agent"] == "UserGuide"}
+    @staticmethod
+    def get_dialog_graphs_and_responses(id):
+        return STAR.get_dialog_graphs(id), STAR.get_dialog_responses(id)
+    @staticmethod
+    def get_scenario_description(scenario):
+        # Let's generate the graph description for each task:
+        flowcharts = ""
+        for task in scenario["WizardCapabilities"]:
+            task_name = task["Task"]
+            flowcharts += f"""
+The graph for the task '{task_name}' with domain '{task['Domain']}' is:
+```dot
+{STAR.read_graph(task_name)}
+```
+and one example responses for each node is provided in the following json:
+```json
+{STAR.read_graph_responses(task_name)}
+```
+---
+"""
+        # Finally, let's return the scenario object and natural language description for it.
+        return f"""The conversation is between a User and a AI assistant in the following domains: {', '.join(scenario['Domains'])}.
+The User instructions are: {scenario['UserTask']}
+The AI assistant instructions are: {scenario['WizardTask']}
+In addition, the AI assistant is instructed to follow specific flowcharts to address the tasks. Flowcharts are defined as graph described using DOT.
+The actual DOT for the current tasks are:
+{flowcharts}
+Finally, the following should be considered regarding the conversation:
+   1. {"The conversation follows the 'happy path', meaning the conversations goes according to what it is described in the flowcharts"
+       if scenario['Happy'] else
+       "The conversation does NOT follow a 'happy path', meaning something happend to the user to change its mind or something happend "
+       "in the environment for the conversation to not flow as expected, as described in the flowchart"}.
+   2. {"The user is calling to perform multiple tasks, involving all the tasks defined as flowcharts above (" + ', '.join(task['Task'] for task in scenario['WizardCapabilities']) + ")"
+        if scenario['MultiTask'] else
+        "The user is calling to perform only the defined task (" + scenario['WizardCapabilities'][0]['Task'] + "), nothing else"}.
+"""
+    @staticmethod
+    def get_dialog_scenario_description(id):
+        scenario = STAR.get_dialog_scenario(id)
+        return scenario, STAR.get_scenario_description(scenario)
+    @staticmethod
+    def get_user_persona_for_scenario(scenario):
+        dialogue_details = f"""
+The following should be considered regarding the conversation:
+   1. {"The conversation follows a 'happy path', meaning the conversations goes smoothly without any unexpected behavior"
+       if scenario['Happy'] else
+       "The conversation does NOT follow a 'happy path', meaning you have to simulate something happend in the middle of the conversation, "
+       "perhaps you changed your mind at some point or something external happend in the environment for the conversation to not flow as expected"}.
+   2. {"The conversation involves multiple tasks, that is, you want the assistant to perform multiple tasks (" + ', '.join(task['Task'] for task in scenario['WizardCapabilities']) + "), not just one."
+        if scenario['MultiTask'] else
+        "The conversation involves only one task you were instructed to (" + scenario['WizardCapabilities'][0]['Task'] + "), nothing else"}"""
+        return Persona(
+            role=f"user calling a AI assistant that can perform multiple tasks in the following domains: {', '.join(scenario['Domains'])}.\n" + dialogue_details,
+            circumstances=scenario["UserTask"],
+        )
+    @staticmethod
+    def get_flowchart_description_for_scenario(scenario):
+        flowcharts = ""
+        for task in scenario["WizardCapabilities"]:
+            task_name = task["Task"]
+            flowcharts += f"""
+## {task_name} ({task['Domain']})
+The flowchart described as an action transition graph for the task '{task_name}' with domain '{task['Domain']}' is:
+```dot
+{STAR.read_graph(task_name)}
+```
+Response example for each action is provided in the following json:
+```json
+{STAR.read_graph_responses(task_name)}
+```
+where UPPERCASE words above are just example placeholders. You MUST fill in those with any coherent values in the actual conversation.
+"""
+        return flowcharts
+    @staticmethod
+    def get_system_persona_for_scenario(scenario):
+        dialogue_details = f"""In the conversation, the AI assistant is instructed to follow specific action flowcharts to address the tasks. Flowcharts are defined as graph described using DOT.
+The actual DOT for the current tasks are:
+{STAR.get_flowchart_description_for_scenario(scenario)}
+"""
+        return Persona(
+            role="AI assistant.\n" + dialogue_details,
+            circumstances=scenario['WizardTask'],
+        )
+    @staticmethod
+    def get_agents_for_scenario(scenario, model_name):
+        user = PersonaAgent(model_name,
+                            STAR.get_user_persona_for_scenario(scenario),
+                            name="User",
+                            can_finish=True)
+        system = PersonaAgent(model_name,
+                              STAR.get_system_persona_for_scenario(scenario),
+                              name="System")
+        return system, user
+    @staticmethod
+    def get_agents_from_dialogue(id, model_name:str, set_first_utterance: bool = False):
+        scenario = STAR.get_dialog_scenario(id)
+        system, user = STAR.get_agents_for_scenario(scenario, model_name)
+        if set_first_utterance:
+            first_turn = STAR.get_dialog_first_turn(id)
+            if first_turn.speaker == "Wizard":
+                system.set_first_utterances(first_turn.text)
+            else:
+                system.set_first_utterances("Hello, how can I help?")
+        return system, user
+    @staticmethod
+    def get_agents_from_dialogue_with_orchestration(id, model_name:str, set_first_utterance: bool = False):
+        system, user = STAR.get_agents_from_dialogue(id, model_name, set_first_utterance)
+        graphs, responses = STAR.get_dialog_graphs_and_responses(id)
+        response_action_orchestrator = SimpleResponseOrchestrator(responses[0], graph=graphs[0])
+        instr_list_orchestrator = InstructionListOrchestrator(
+            STAR.get_dialog_user_instructions(id),
+            persistent=True
+        )
+        return system | response_action_orchestrator, user | instr_list_orchestrator

sdialog-0.0.1/src/sdialog/generators.py ADDED Viewed

@@ -0,0 +1,82 @@
+import json
+import random
+from langchain_ollama.chat_models import ChatOllama
+from langchain_core.messages import HumanMessage, SystemMessage
+from pydantic import BaseModel
+from print_color import print
+from typing import Union, List
+from langchain_ollama.chat_models import ChatOllama
+from langchain_core.messages import HumanMessage, SystemMessage
+from . import Dialog, Turn
+class LLMDialogOutput(BaseModel):
+  dialog: List[Turn]
+# TODO: create a BaseDialogGenerator, and also PersonaDialogGenerator that takes personas objects as in multi-agent.
+class DialogGenerator:
+    def __init__(self, model: Union[ChatOllama, str], dialogue_details: str, output_format: Union[dict, BaseModel] = LLMDialogOutput, scenario: dict = None):
+        """Optional `scenario` to populate the "scenario" field of the output, if not provided, `dialogue_details` content will be used."""
+        if not output_format or type(output_format) == dict:
+            output_format_schema = output_format
+            self.output_format = None
+        else:
+            output_format_schema = output_format.model_json_schema()
+            self.output_format = output_format
+        if type(model) == str:
+            self.llm = ChatOllama(model=model,
+                                  format=output_format_schema,
+                                  temperature=0.8,
+                                  seed=13)
+        else:
+            self.llm = model
+            if output_format:
+                self.llm.format = output_format
+        self.model_name = model
+        self.set(dialogue_details, scenario)
+    def generate(self, seed: int = None, id: int = None):
+        self.llm.seed = seed if seed is not None else random.getrandbits(32)
+        # hack to avoid seed bug in prompt cache (to force a new cache, related to https://github.com/ollama/ollama/issues/5321)
+        _ = self.llm.num_predict
+        self.llm.num_predict = 1
+        self.llm.invoke(self.messages)
+        self.llm.num_predict = _
+        dialogue = self.llm.invoke(self.messages).content
+        if not self.output_format:
+            return dialogue
+        else:
+            llm_output = self.output_format.model_validate(json.loads(dialogue))
+            if self.output_format is LLMDialogOutput:
+                return Dialog(dialogId=id if id else None,
+                            model=self.model_name,
+                            seed=self.llm.seed,
+                            scenario=self.scenario if self.scenario else self.dialogue_details,
+                            turns=llm_output.dialog)
+            else:
+                return llm_output
+    def set(self, dialogue_details: str, scenario:dict=None):
+        self.scenario = scenario
+        self.dialogue_details = dialogue_details
+        self.messages = [
+            SystemMessage(
+                "You are a knowledgeable and useful AI assistant that can write natural conversations by role paying different speakers."
+                "The output should be a full dialogue, from begining (greetings) to end (bye bye messages)."
+            ),
+            HumanMessage(content=dialogue_details)
+        ]
+    __call__ = generate

sdialog-0.0.1/src/sdialog/orchestrators.py ADDED Viewed

@@ -0,0 +1,224 @@
+import json
+import random
+import inspect
+import numpy as np
+from time import time
+from abc import ABC, abstractmethod
+from typing import List, Union, Dict, Optional
+from sentence_transformers import SentenceTransformer
+from langchain_core.messages import SystemMessage, AIMessage
+from . import Turn, Event, Instruction
+from .util import make_serializable
+# from .personas import PersonaAgent
+class BaseOrchestrator(ABC):
+    _target = None
+    _event_label = None
+    _persistent = False
+    def __init__(self, target_agent = None, persistent: bool = None, event_label: str = None):
+        self._target = target_agent
+        self._persistent = persistent
+        self._event_label = event_label
+    def __call__(self):
+        dialog = self.__get_current_dialog()
+        return self.instruct(dialog, dialog[-1].text if dialog and dialog[-1].speaker != self._target.get_name() else "")
+    def __str__(self) -> str:
+        data = self.json()
+        attrs = " ".join(f"{key}={value}" for key, value in data["args"].items())
+        return f"{data['name']}({attrs})"
+    def __get_current_dialog(self) -> List[Turn]:
+        return [Turn(speaker=self._target.get_name() if type(message) == AIMessage else None, text=message.content)
+                for message in self._target.memory if type(message) != SystemMessage]
+    def _set_target_agent(self, agent):  # target: PersonaAgent
+        self._target = agent
+    def json(self, string: bool = False, indent: int =None):
+        sig = inspect.signature(self.__init__)
+        data = {"name": type(self).__name__,
+                "args": {key: self.__dict__[key] for key in sig.parameters
+                         if key in self.__dict__ and self.__dict__[key] is not None}}
+        make_serializable(data["args"])
+        return json.dumps(data, indent=indent) if string else data
+    def get_event_label(self) -> str:
+        return self._event_label if self._event_label else type(self).__name__
+    def get_target_agent(self):
+        return self._target
+    def is_persistent(self):
+        return self._persistent
+    def set_persistent(self, value: bool):
+        self._persistent = value
+    def agent_response_lookahead(self):
+        return self._target.response_lookahead()
+    @abstractmethod
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        pass
+    def reset(self):
+        pass
+class BasePersistentOrchestrator(BaseOrchestrator):  #, ABC):
+    _persistent = True
+    @abstractmethod
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        pass
+    def reset(self):
+        pass
+class SimpleReflexOrchestrator(BaseOrchestrator):
+    def __init__(self, condition: callable, instruction: str, persistent: bool = False, event_label: str = None):
+        super().__init__(persistent=persistent, event_label=event_label)
+        self.condition = condition
+        self.instruction = instruction
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        if self.condition(utterance):
+            return self.instruction
+class LengthOrchestrator(BaseOrchestrator):
+    def __init__(self, min: int = None, max: int = None, persistent: bool = False, event_label: str = None):
+        super().__init__(persistent=persistent, event_label=event_label)
+        self.max = max
+        self.min = min
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        if self.min is not None and len(dialog) < self.min and len(dialog) > 1:
+            return "Make sure you DO NOT finish the conversation, keep it going!"
+        elif self.max and len(dialog) >= self.max - 1:  # + answer
+            return "Now FINISH the conversation AS SOON AS possible, if possible, RIGHT NOW!"
+class ChangeMindOrchestrator(BaseOrchestrator):
+    def __init__(self, probability: float = 0.3,
+                 reasons: Union[str, List[str]] = None,
+                 max_times: int = 1,
+                 persistent: bool = False,
+                 event_label: str = None):
+        super().__init__(persistent=persistent, event_label=event_label)
+        self.probability = probability
+        self.reasons = [reasons] if type(reasons) == str else reasons
+        self.max_times = max_times
+        self.times = 0
+    def reset(self):
+        self.times = 0
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        if self.max_times and self.times >= self.max_times:
+            return
+        if random.random() <= self.probability:
+            self.times += 1
+            instruction = "Change your mind completely, in your next utterance, suggest something completely different!"
+            if self.reasons:
+                instruction += f" **Reason:** {random.choice(self.reasons)}."
+            return instruction
+class SimpleResponseOrchestrator(BaseOrchestrator):
+    def __init__(self,
+                 responses: List[Union[str, Dict[str, str]]],
+                 graph: Dict[str, str] = None,
+                #  sbert_model: str = "sentence-transformers/LaBSE",
+                 sbert_model: str = "sergioburdisso/dialog2flow-joint-bert-base",
+                 top_k: int = 5):
+        self.sent_encoder = SentenceTransformer(sbert_model)
+        self.responses = responses
+        self.top_k = top_k
+        if type(responses) == dict:
+            self.resp_utts = np.array([resp for resp in responses.values()])
+            self.resp_acts = np.array([act for act in responses.keys()])
+            self.graph = graph
+        else:
+            self.resp_utts = np.array(responses)
+            self.resp_acts = None
+            self.graph = None
+        self.resp_utt_embs = self.sent_encoder.encode(self.resp_utts)
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        agent = self.get_target_agent()
+        agent_last_turn = None
+        if self.graph and dialog:
+            for turn in dialog[::-1]:
+                if turn.speaker == agent.get_name():
+                    agent_last_turn = turn.text
+                    break
+        response = agent_last_turn if agent_last_turn else agent.response_lookahead()
+        events = [Event(agent=agent.get_name(),
+                        action="request_suggestions",
+                        actionLabel=self.get_event_label(),
+                        text=f'Previous response: "{response}"' if agent_last_turn else f'Lookahead response: "{response}"',
+                        timestamp=int(time()))]
+        sims = self.sent_encoder.similarity(self.sent_encoder.encode(response), self.resp_utt_embs)[0]
+        top_k_ixs = sims.argsort(descending=True)[:self.top_k]
+        if self.resp_acts is None:
+            instruction = ("If applicable, try to pick your next response from the following list: " +
+                           "; ".join(f'({ix + 1}) {resp}' for ix, resp in enumerate(self.resp_utts[top_k_ixs])))
+        else:
+            next_actions = self.resp_acts[top_k_ixs].tolist()
+            events.append(Event(agent=agent.get_name(),
+                            action="request_suggestions",
+                            actionLabel=self.get_event_label(),
+                            text="Actions for the response: "+ ", ".join(action for action in next_actions),
+                            timestamp=int(time())))
+            if agent_last_turn:
+                next_actions = [self.graph[action] if action in self.graph else action
+                                for action in next_actions]
+                events.append(Event(agent=agent.get_name(),
+                            action="request_suggestions",
+                            actionLabel=self.get_event_label(),
+                            text="Graph next actions: " + ", ".join(action for action in next_actions),
+                            timestamp=int(time())))
+            # TODO: remove repeated actions! (make it a set()?)
+            next_actions = [action for action in next_actions if action in self.responses]
+            instruction = ("If applicable, pick your next response from the following action list in order of importance: " +
+                           "; ".join(f'({ix + 1}) Action: {action}. Response: "{self.responses[action]}"' for ix, action in enumerate(next_actions)))
+        return Instruction(text=instruction, events=events)
+class InstructionListOrchestrator(BaseOrchestrator):
+    def __init__(self,
+                 instructions: List[Union[str, Dict[int, str]]],
+                 persistent: bool = False):
+        super().__init__(persistent=persistent)
+        self.instructions = instructions
+    def instruct(self, dialog: List[Turn], utterance: str) -> str:
+        agent = self.get_target_agent()
+        if dialog:
+            current_user_len = len([t for t in dialog if t.speaker == agent.get_name()])
+        else:
+            current_user_len = 0
+        if (type(self.instructions) == dict and current_user_len in self.instructions) or \
+           (type(self.instructions) == list and current_user_len < len(self.instructions)):
+            return self.instructions[current_user_len]

sdialog-0.0.1/src/sdialog/personas.py ADDED Viewed

@@ -0,0 +1,299 @@
+import json
+import random
+from time import time
+from tqdm.auto import trange
+from print_color import print
+from typing import List, Union, Optional
+from langchain_ollama.chat_models import ChatOllama
+# from langchain_core.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+from . import Dialog, Turn, Event, Instruction
+from .orchestrators import BaseOrchestrator
+from .util import make_serializable
+class Meta(type):
+    def __init__(cls, name, bases, dct):
+        def auto__call__init__(self, *a, **kw):
+            for base in cls.__bases__:
+                base.__init__(self, *a, **kw)
+            cls.__init__child_(self, *a, **kw)
+            cls.__init__child_ = cls.__init__
+            cls.__init__ = auto__call__init__
+class BasePersona(metaclass=Meta):
+    def __init__(self, **kwargs):
+        self.__dict__.update(kwargs)
+    def description(self) -> str:
+        return "\n".join(f"Your {key}: {value}" for key, value in self.__dict__.items())
+    def __str__(self) -> str:
+        return self.description()
+    def json(self, string: bool = False, indent=None):
+        data = self.__dict__.copy()
+        make_serializable(data)
+        return json.dumps(data, indent=indent) if string else data
+class Persona(BasePersona):
+    name: str = ""
+    role: str = ""
+    background: str = ""
+    personality: str = ""
+    circumstances: str = ""
+    rules: str = ""
+class PersonaAgent:
+    STOP_WORD = "STOP"
+    STOP_WORD_TEXT = "(bye bye!)"
+    def __init__(self,
+                 model : Union[str, ChatOllama],
+                 persona: BasePersona = Persona(),
+                 name: str = None,
+                 dialogue_details: str = "",
+                 response_details: str = "responses SHOULD NOT be too long and wordy, should be approximately one utterance long",
+                 system_prompt: str = None,
+                 can_finish: bool = False,
+                 orchestrators: Union[BaseOrchestrator, List[BaseOrchestrator]] = None,
+                 scenario: Union[dict, str] = None):
+        if not system_prompt:
+            if can_finish:
+                conversation_end_instructions = f"To finish the conversation you first have to say good bye and immediately after you **MUST** output '{self.STOP_WORD}' to indicate it is the end of it."
+            else:
+                conversation_end_instructions = "When the user finish the conversation you should say good bye and also finish the conversation"
+            # system_prompt = prompt_template.format(role=role, ...)
+            system_prompt = f"""Role play as a character that is described by the persona defined in the following lines. You always stay in character.
+[[ ## BEGING PERSONA ## ]]
+{persona}
+[[ ## END PERSONA ## ]]
+---
+{"Details about the overall dialogue: " + dialogue_details if dialogue_details else ""}
+{"Details about your responses: " + response_details if response_details else ""}
+Finally, remember:
+   1. You always stay on character. You are the character described above.
+   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what's up?", etc.), and nothing else, wait for a reply before start with the actual conversation.
+   3. {conversation_end_instructions}."""
+        if type(model) == str:
+            # TODO: ChatHuggingFace
+            self.llm = ChatOllama(model=model,
+                                  temperature=0.8,
+                                  seed=13)
+        else:
+            self.llm = model
+        self.memory = [SystemMessage(system_prompt)]
+        self.name = name if name else (persona.name if hasattr(persona, "name") else None)
+        self.persona = persona
+        self.model_name = str(self.llm)
+        self.first_utterances = None
+        self.finished = False
+        self.scenario = scenario
+        self.orchestrators = None
+        self.add_orchestrators(orchestrators)
+    def __call__(self, utterance: str = "", return_events: bool = False) -> str:
+        if self.finished:
+            return None
+        if utterance:
+            self.memory.append(HumanMessage(content=utterance))
+        if return_events: events = []
+        if self.orchestrators:
+            for orchestrator in self.orchestrators:
+                instruction = orchestrator()
+                if instruction:
+                    if type(instruction) == Instruction:
+                        if return_events and instruction.events:
+                            if type(instruction.events) == Event: events.append(instruction.events)
+                            else: events.extend(instruction.events)
+                        instruction = instruction.text
+                    persist = orchestrator.is_persistent()
+                    self.instruct(instruction, persist=persist)
+                    if return_events:
+                        events.append(Event(agent=self.get_name(),
+                                            action="instruct" + ("-persist" if persist else ""),
+                                            actionLabel=orchestrator.get_event_label(),
+                                            text=instruction,
+                                            timestamp=int(time())))
+        if len(self.memory) <= 1 and self.first_utterances:
+            response = random.choice(self.first_utterances) if type(self.first_utterances) == list else self.first_utterances
+            response = AIMessage(content=response)
+        else:
+            response = self.llm.invoke(self.memory)
+        if self.orchestrators:
+            self.memory[:] = [msg for msg in self.memory
+                              if not (msg.response_metadata and "persist" in msg.response_metadata and not msg.response_metadata["persist"])]
+        self.memory.append(response)
+        response = response.content
+        if self.STOP_WORD in response:
+            response = response.replace(self.STOP_WORD, self.STOP_WORD_TEXT).strip()
+            self.memory[-1].content = self.memory[-1].content.replace(self.STOP_WORD, "").strip()
+            self.finished = True
+        if return_events:
+            if response:
+                events.append(Event(agent=self.get_name(),
+                                    action="utter",
+                                    text=response,
+                                    timestamp=int(time())))
+            return events
+        else:
+            return response if response else ""
+    def __or__(self, orchestrator: Union[BaseOrchestrator, List[BaseOrchestrator]]):
+        self.add_orchestrators(orchestrator)
+        return self
+    def response_lookahead(self, utterance: str = None):
+        if not utterance:
+            return self.llm.invoke(self.memory).content
+        return self.llm.invoke(self.memory + [HumanMessage(utterance)]).content
+    def add_orchestrators(self, orchestrators):
+        if not orchestrators:
+            return
+        if self.orchestrators == None:
+            self.orchestrators = []
+        if isinstance(orchestrators, BaseOrchestrator):
+            orchestrators = [orchestrators]
+        self.orchestrators.extend(orchestrators)
+        for orchestrator in orchestrators:
+            orchestrator._set_target_agent(self)
+    def clear_orchestrators(self):
+        self.orchestrators = None
+    def instruct(self, instruction: str, persist: bool = False):
+        self.memory.append(SystemMessage(instruction, response_metadata={"persist": persist}))
+    def set_first_utterances(self, utterances: Union[str, List[str]]):
+        self.first_utterances = utterances
+    def get_name(self):
+        return self.name
+    def get_prompt(self):
+        return self.memory[0].content
+    def json(self, string: bool = False, indent=None):
+        data = {}
+        if self.name:
+            data["name"] = self.name
+        data["model_name"] = self.model_name
+        if self.first_utterances:
+            data["first_utterances"] = self.first_utterances
+        data["persona"] = self.persona.json()
+        if self.orchestrators:
+            data["persona"]["orchestrators"] = [orc.json() for orc in self.orchestrators]
+        return json.dumps(data, indent=indent) if string else data
+    def reset(self, seed:int = None):
+        self.memory[:] = self.memory[:1]
+        self.finished = False
+        self.llm.seed = seed
+        if self.orchestrators:
+            for orchestrator in self.orchestrators:
+                orchestrator.reset()
+        # hack to avoid seed bug in prompt cache (to force a new cache, related to https://github.com/ollama/ollama/issues/5321)
+        _ = self.llm.num_predict
+        self.llm.num_predict = 1
+        self.llm.invoke(self.memory)
+        self.llm.num_predict = _
+    def dialog_with(self,
+                    persona: "PersonaAgent",
+                    max_iterations: int = 20,
+                    id: int = None,
+                    seed: int = None,
+                    keep_bar: bool = True):
+        seed = seed if seed is not None else random.getrandbits(32)
+        random.seed(seed)
+        self.reset(seed)
+        persona.reset(seed)
+        dialog = []
+        events = []
+        utter = None
+        completion = False
+        tqdm_iterator = trange(max_iterations, desc="Dialogue", leave=keep_bar)
+        for _ in tqdm_iterator:
+            utt_events = self(utter, return_events=True)
+            if utt_events and utt_events[-1].action == "utter":
+                utter = utt_events[-1].text
+                utt_events[-1].text = utter.replace(self.STOP_WORD_TEXT, "").strip()
+                if not utt_events[-1].text: break
+            else:
+                completion = True
+                break
+            dialog.append(Turn(
+                speaker=self.get_name() if self.get_name() else "Me",
+                text=utt_events[-1].text
+            ))
+            events.extend(utt_events)
+            utt_events = persona(utter, return_events=True)
+            if utt_events and utt_events[-1].action == "utter":
+                utter = utt_events[-1].text
+                utt_events[-1].text = utter.replace(self.STOP_WORD_TEXT, "").strip()
+                if not utt_events[-1].text: break
+            else:
+                completion = True
+                break
+            dialog.append(Turn(
+                speaker=persona.get_name() if persona.get_name() else "Other",
+                text=utt_events[-1].text
+            ))
+            events.extend(utt_events)
+        if not keep_bar:
+            tqdm_iterator.container.close()
+        if self.scenario:
+            scenario = self.scenario
+        else:
+            scenario = {
+                "agents": [
+                    self.json(),
+                    persona.json()
+                ]
+            }
+        return Dialog(
+            dialogId=id if id else None,
+            complete=completion,  # incomplete if ran out of iterations (reached max_iteration number)
+            model=self.model_name,
+            seed=seed,
+            scenario=scenario,
+            turns=dialog,
+            events=events
+        )
+    talk_with = dialog_with

sdialog-0.0.1/src/sdialog/util.py ADDED Viewed

@@ -0,0 +1,11 @@
+import json
+def make_serializable(data:dict):
+    for key, value in data.items():
+        try:
+            json.dumps(value)
+        except (TypeError, OverflowError):
+            data[key] = str(value)
+    return data

sdialog-0.0.1/src/sdialog.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: sdialog
+Version: 0.0.1
+Summary: Synthetic Dialogue Generation and Analysis
+Author-email: Sergio Burdisso <sergio.burdisso@gmail.com>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/idiap/sdialog
+Project-URL: Issues, https://github.com/idiap/sdialog/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: print-color
+Requires-Dist: langchain
+Requires-Dist: langchain-ollama
+Requires-Dist: tqdm
+Requires-Dist: plotly
+Requires-Dist: sentence-transformers
+Requires-Dist: pandas
+Requires-Dist: tenacity
+Requires-Dist: numpy
+Requires-Dist: flake8
+Requires-Dist: pytest
+Requires-Dist: ollama
+Dynamic: license-file
+# SDialog
+Synthetic Dialogue Generation and Analysis
+_(Comming soon)_
+This package requires `Ollama` running is your system.

sdialog-0.0.1/src/sdialog.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+LICENSE
+README.md
+pyproject.toml
+requirements.txt
+src/sdialog/__init__.py
+src/sdialog/datasets.py
+src/sdialog/generators.py
+src/sdialog/orchestrators.py
+src/sdialog/personas.py
+src/sdialog/util.py
+src/sdialog.egg-info/PKG-INFO
+src/sdialog.egg-info/SOURCES.txt
+src/sdialog.egg-info/dependency_links.txt
+src/sdialog.egg-info/requires.txt
+src/sdialog.egg-info/top_level.txt

sdialog-0.0.1/src/sdialog.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

sdialog-0.0.1/src/sdialog.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,12 @@
+print-color
+langchain
+langchain-ollama
+tqdm
+plotly
+sentence-transformers
+pandas
+tenacity
+numpy
+flake8
+pytest
+ollama

sdialog-0.0.1/src/sdialog.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ sdialog