PyPI - janus-llm - Versions diffs - 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

janus-llm 1.0.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

janus/__init__.py +9 -1
janus/__main__.py +4 -0
janus/_tests/test_cli.py +128 -0
janus/_tests/test_translate.py +49 -7
janus/cli.py +530 -46
janus/converter.py +50 -19
janus/embedding/_tests/test_collections.py +2 -8
janus/embedding/_tests/test_database.py +32 -0
janus/embedding/_tests/test_vectorize.py +9 -4
janus/embedding/collections.py +49 -6
janus/embedding/embedding_models_info.py +120 -0
janus/embedding/vectorize.py +53 -62
janus/language/_tests/__init__.py +0 -0
janus/language/_tests/test_combine.py +62 -0
janus/language/_tests/test_splitter.py +16 -0
janus/language/binary/_tests/test_binary.py +16 -1
janus/language/binary/binary.py +10 -3
janus/language/block.py +31 -30
janus/language/combine.py +26 -34
janus/language/mumps/_tests/test_mumps.py +2 -2
janus/language/mumps/mumps.py +93 -9
janus/language/naive/__init__.py +4 -0
janus/language/naive/basic_splitter.py +14 -0
janus/language/naive/chunk_splitter.py +26 -0
janus/language/naive/registry.py +13 -0
janus/language/naive/simple_ast.py +18 -0
janus/language/naive/tag_splitter.py +61 -0
janus/language/splitter.py +168 -74
janus/language/treesitter/_tests/test_treesitter.py +9 -6
janus/language/treesitter/treesitter.py +37 -13
janus/llm/model_callbacks.py +177 -0
janus/llm/models_info.py +134 -70
janus/metrics/__init__.py +8 -0
janus/metrics/_tests/__init__.py +0 -0
janus/metrics/_tests/reference.py +2 -0
janus/metrics/_tests/target.py +2 -0
janus/metrics/_tests/test_bleu.py +56 -0
janus/metrics/_tests/test_chrf.py +67 -0
janus/metrics/_tests/test_file_pairing.py +59 -0
janus/metrics/_tests/test_llm.py +91 -0
janus/metrics/_tests/test_reading.py +28 -0
janus/metrics/_tests/test_rouge_score.py +65 -0
janus/metrics/_tests/test_similarity_score.py +23 -0
janus/metrics/_tests/test_treesitter_metrics.py +110 -0
janus/metrics/bleu.py +66 -0
janus/metrics/chrf.py +55 -0
janus/metrics/cli.py +7 -0
janus/metrics/complexity_metrics.py +208 -0
janus/metrics/file_pairing.py +113 -0
janus/metrics/llm_metrics.py +202 -0
janus/metrics/metric.py +466 -0
janus/metrics/reading.py +70 -0
janus/metrics/rouge_score.py +96 -0
janus/metrics/similarity.py +53 -0
janus/metrics/splitting.py +38 -0
janus/parsers/_tests/__init__.py +0 -0
janus/parsers/_tests/test_code_parser.py +32 -0
janus/parsers/code_parser.py +24 -253
janus/parsers/doc_parser.py +169 -0
janus/parsers/eval_parser.py +80 -0
janus/parsers/reqs_parser.py +72 -0
janus/prompts/prompt.py +103 -30
janus/translate.py +636 -111
janus/utils/_tests/__init__.py +0 -0
janus/utils/_tests/test_logger.py +67 -0
janus/utils/_tests/test_progress.py +20 -0
janus/utils/enums.py +56 -3
janus/utils/progress.py +56 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
janus_llm-2.0.0.dist-info/RECORD +94 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
janus_llm-1.0.0.dist-info/RECORD +0 -48
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0

janus/parsers/reqs_parser.py ADDED Viewed

@@ -0,0 +1,72 @@
+import json
+import re
+from langchain.output_parsers.json import parse_json_markdown
+from langchain.schema.output_parser import BaseOutputParser
+from langchain_core.exceptions import OutputParserException
+from langchain_core.messages import AIMessage
+from ..language.block import CodeBlock
+from ..utils.logger import create_logger
+from .code_parser import JanusParser
+log = create_logger(__name__)
+class RequirementsParser(BaseOutputParser[str], JanusParser):
+    block_name: str = ""
+    def __init__(self):
+        super().__init__(expected_keys=[])
+    def set_reference(self, block: CodeBlock):
+        self.block_name = block.name
+    def parse(self, text: str) -> str:
+        if isinstance(text, AIMessage):
+            text = text.content
+        text = text.lstrip("```json")
+        text = text.rstrip("```")
+        try:
+            obj = parse_json_markdown(text)
+        except json.JSONDecodeError as e:
+            log.debug(f"Invalid JSON object. Output:\n{text}")
+            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+        if not isinstance(obj, dict):
+            raise OutputParserException(
+                f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
+            )
+        return json.dumps(obj)
+    def parse_combined_output(self, text: str):
+        """Parse the output text from the LLM when multiple inputs are combined.
+        Arguments:
+            text: The output text from the LLM.
+        Returns:
+            A parsed version of the text.
+        """
+        json_strings = re.findall(r"\{.*?\}", text)
+        output_list = list()
+        for i, json_string in enumerate(json_strings, 1):
+            json_dict = json.loads(json_string)
+            output_list.append(json_dict["requirements"])
+        return output_list
+    def get_format_instructions(self) -> str:
+        """Get the format instructions for the parser.
+        Returns:
+            The format instructions for the LLM.
+        """
+        return (
+            "Output must contain an ieee style requirements specification "
+            "all in a json-formatted string, including the following field: "
+            '"requirements".'
+        )
+    @property
+    def _type(self) -> str:
+        return self.__class__.name

janus/prompts/prompt.py CHANGED Viewed

@@ -1,15 +1,14 @@
 import json
+from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import List
+from langchain import PromptTemplate
 from langchain.prompts import ChatPromptTemplate
 from langchain.prompts.chat import (
     HumanMessagePromptTemplate,
     SystemMessagePromptTemplate,
 )
-from langchain.schema.messages import BaseMessage
-from ..language.block import CodeBlock
 from ..utils.enums import LANGUAGES
 from ..utils.logger import create_logger
@@ -18,12 +17,13 @@ log = create_logger(__name__)
 # Prompt names (self.template_map keys) that should output text,
 # regardless of the `output-lang` argument.
-TEXT_OUTPUT = ["document", "requirements"]
+TEXT_OUTPUT = []
 # Prompt names (self.template_map keys) that should output the
 # same language as the input, regardless of the `output-lang` argument.
 SAME_OUTPUT = ["document_inline"]
-JSON_OUTPUT = ["evaluate"]
+JSON_OUTPUT = ["evaluate", "document", "document_madlibs", "requirements"]
 # Directory containing Janus prompt template directories and files
 JANUS_PROMPT_TEMPLATES_DIR = Path(__file__).parent / "templates"
@@ -34,7 +34,7 @@ HUMAN_PROMPT_TEMPLATE_FILENAME = "human.txt"
 PROMPT_VARIABLES_FILENAME = "variables.json"
-class PromptEngine:
+class PromptEngine(ABC):
     """A class defining prompting schemes for the LLM."""
     def __init__(
@@ -59,22 +59,14 @@ class PromptEngine:
         template_path = self.get_prompt_template_path(prompt_template)
         self._template_path = template_path
         self._template_name = prompt_template
-        system_prompt_path = SystemMessagePromptTemplate.from_template(
-            (template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME).read_text()
-        )
-        human_prompt_path = HumanMessagePromptTemplate.from_template(
-            (template_path / HUMAN_PROMPT_TEMPLATE_FILENAME).read_text()
-        )
-        self.prompt = ChatPromptTemplate.from_messages(
-            [system_prompt_path, human_prompt_path]
-        )
+        self.prompt = self.load_prompt_template(template_path)
         # Define variables to be passed in to the prompt formatter
         source_language = source_language.lower()
         target_language = target_language.lower()
         self.variables = dict(
-            SOURCE_LANGUAGE=source_language.lower(),
-            TARGET_LANGUAGE=target_language.lower(),
+            SOURCE_LANGUAGE=source_language,
+            TARGET_LANGUAGE=target_language,
             TARGET_LANGUAGE_VERSION=str(target_version),
             FILE_SUFFIX=LANGUAGES[source_language]["suffix"],
             SOURCE_CODE_EXAMPLE=LANGUAGES[source_language]["example"],
@@ -83,20 +75,11 @@ class PromptEngine:
         variables_path = template_path / PROMPT_VARIABLES_FILENAME
         if variables_path.exists():
             self.variables.update(json.loads(variables_path.read_text()))
+        self.prompt = self.prompt.partial(**self.variables)
-    def create(self, code: CodeBlock) -> List[BaseMessage]:
-        """Convert a code block to a Chat GPT prompt.
-        Arguments:
-            code: The code block to convert.
-        Returns:
-            The converted prompt as a list of messages.
-        """
-        return self.prompt.format_prompt(
-            SOURCE_CODE=code.text,
-            **self.variables,
-        ).to_messages()
+    @abstractmethod
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        pass
     @staticmethod
     def get_prompt_template_path(template_name: str) -> Path:
@@ -146,3 +129,93 @@ class PromptEngine:
                 f"Specified prompt template directory {template_path} is "
                 f"missing a {HUMAN_PROMPT_TEMPLATE_FILENAME}"
             )
+class ChatGptPromptEngine(PromptEngine):
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
+        system_prompt = system_prompt_path.read_text()
+        system_message = SystemMessagePromptTemplate.from_template(system_prompt)
+        human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        human_prompt = human_prompt_path.read_text()
+        human_message = HumanMessagePromptTemplate.from_template(human_prompt)
+        return ChatPromptTemplate.from_messages([system_message, human_message])
+class ClaudePromptEngine(PromptEngine):
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        prompt = prompt_path.read_text()
+        return PromptTemplate.from_template(f"Human: {prompt}\n\nAssistant: ")
+class TitanPromptEngine(PromptEngine):
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        prompt = prompt_path.read_text()
+        return PromptTemplate.from_template(f"User: {prompt}\n\nAssistant: ")
+class Llama2PromptEngine(PromptEngine):
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
+        system_prompt = system_prompt_path.read_text()
+        human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        human_prompt = human_prompt_path.read_text()
+        return PromptTemplate.from_template(
+            f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{human_prompt} [/INST]"
+        )
+class Llama3PromptEngine(PromptEngine):
+    # see https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3
+    #            /#special-tokens-used-with-meta-llama-3
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
+        system_prompt = system_prompt_path.read_text()
+        human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        human_prompt = human_prompt_path.read_text()
+        return PromptTemplate.from_template(
+            f"<|begin_of_text|>"
+            f"<|start_header_id|>"
+            f"system"
+            f"<|end_header_id|>"
+            f"\n\n{system_prompt}"
+            f"<|eot_id|>"
+            f"<|start_header_id|>"
+            f"user"
+            f"<|end_header_id|>"
+            f"\n\n{human_prompt}"
+            f"<|eot_id|>"
+            f"<|start_header_id|>"
+            f"assistant"
+            f"<|end_header_id|>"
+            f"\n\n"
+        )
+class CoherePromptEngine(PromptEngine):
+    # see https://docs.cohere.com/docs/prompting-command-r
+    def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
+        system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
+        system_prompt = system_prompt_path.read_text()
+        human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
+        human_prompt = human_prompt_path.read_text()
+        return PromptTemplate.from_template(
+            f"<BOS_TOKEN>"
+            f"<|START_OF_TURN_TOKEN|>"
+            f"<|SYSTEM_TOKEN|>"
+            f"{system_prompt}"
+            f"<|END_OF_TURN_TOKEN|>"
+            f"<|START_OF_TURN_TOKEN|>"
+            f"<|USER_TOKEN|>"
+            f"{human_prompt}"
+            f"<|END_OF_TURN_TOKEN|>"
+        )

janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

janus-llm 1.0.0py3-none-any.whl → 2.0.0py3-none-any.whl