PyPI - janus-llm - Versions diffs - 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

janus-llm 1.0.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

janus/__init__.py +9 -1
janus/__main__.py +4 -0
janus/_tests/test_cli.py +128 -0
janus/_tests/test_translate.py +49 -7
janus/cli.py +530 -46
janus/converter.py +50 -19
janus/embedding/_tests/test_collections.py +2 -8
janus/embedding/_tests/test_database.py +32 -0
janus/embedding/_tests/test_vectorize.py +9 -4
janus/embedding/collections.py +49 -6
janus/embedding/embedding_models_info.py +120 -0
janus/embedding/vectorize.py +53 -62
janus/language/_tests/__init__.py +0 -0
janus/language/_tests/test_combine.py +62 -0
janus/language/_tests/test_splitter.py +16 -0
janus/language/binary/_tests/test_binary.py +16 -1
janus/language/binary/binary.py +10 -3
janus/language/block.py +31 -30
janus/language/combine.py +26 -34
janus/language/mumps/_tests/test_mumps.py +2 -2
janus/language/mumps/mumps.py +93 -9
janus/language/naive/__init__.py +4 -0
janus/language/naive/basic_splitter.py +14 -0
janus/language/naive/chunk_splitter.py +26 -0
janus/language/naive/registry.py +13 -0
janus/language/naive/simple_ast.py +18 -0
janus/language/naive/tag_splitter.py +61 -0
janus/language/splitter.py +168 -74
janus/language/treesitter/_tests/test_treesitter.py +9 -6
janus/language/treesitter/treesitter.py +37 -13
janus/llm/model_callbacks.py +177 -0
janus/llm/models_info.py +134 -70
janus/metrics/__init__.py +8 -0
janus/metrics/_tests/__init__.py +0 -0
janus/metrics/_tests/reference.py +2 -0
janus/metrics/_tests/target.py +2 -0
janus/metrics/_tests/test_bleu.py +56 -0
janus/metrics/_tests/test_chrf.py +67 -0
janus/metrics/_tests/test_file_pairing.py +59 -0
janus/metrics/_tests/test_llm.py +91 -0
janus/metrics/_tests/test_reading.py +28 -0
janus/metrics/_tests/test_rouge_score.py +65 -0
janus/metrics/_tests/test_similarity_score.py +23 -0
janus/metrics/_tests/test_treesitter_metrics.py +110 -0
janus/metrics/bleu.py +66 -0
janus/metrics/chrf.py +55 -0
janus/metrics/cli.py +7 -0
janus/metrics/complexity_metrics.py +208 -0
janus/metrics/file_pairing.py +113 -0
janus/metrics/llm_metrics.py +202 -0
janus/metrics/metric.py +466 -0
janus/metrics/reading.py +70 -0
janus/metrics/rouge_score.py +96 -0
janus/metrics/similarity.py +53 -0
janus/metrics/splitting.py +38 -0
janus/parsers/_tests/__init__.py +0 -0
janus/parsers/_tests/test_code_parser.py +32 -0
janus/parsers/code_parser.py +24 -253
janus/parsers/doc_parser.py +169 -0
janus/parsers/eval_parser.py +80 -0
janus/parsers/reqs_parser.py +72 -0
janus/prompts/prompt.py +103 -30
janus/translate.py +636 -111
janus/utils/_tests/__init__.py +0 -0
janus/utils/_tests/test_logger.py +67 -0
janus/utils/_tests/test_progress.py +20 -0
janus/utils/enums.py +56 -3
janus/utils/progress.py +56 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
janus_llm-2.0.0.dist-info/RECORD +94 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
janus_llm-1.0.0.dist-info/RECORD +0 -48
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
{janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0

janus/metrics/file_pairing.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import Any, Callable
+from ..language.binary import BinarySplitter
+from ..language.mumps import MumpsSplitter
+from ..language.node import NodeType
+from ..language.treesitter import TreeSitterSplitter
+from ..utils.enums import CUSTOM_SPLITTERS
+FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
+def register_pairing_method(name: None | str = None) -> Callable[[Callable], Callable]:
+    """Registers a pairing method for pairing strings between files
+    Arguments:
+        name: The name of the pairing method. If None, the function name is used.
+        help: The help text for the pairing method.
+    Returns:
+        The decorator function.
+    """
+    def decorator(f: Callable[[str, str], list[tuple[str, str]]]):
+        if name is None:
+            pairing_name = f.__name__
+        else:
+            pairing_name = name
+        FILE_PAIRING_METHODS[pairing_name] = f
+        return f
+    return decorator
+@register_pairing_method(name="file")
+def pair_by_file(
+    target: str, reference: str, **kwargs: dict[str, Any]
+) -> list[tuple[str, str]]:
+    """Pairs the entire contents of a file together
+    Arguments:
+        target: The target file text.
+        reference: The reference file text.
+        state: The current evaluation state.
+    Returns:
+        A list of tuples of the target and reference file text.
+    """
+    return [(target, reference)]
+@register_pairing_method(name="line")
+def pair_by_line(
+    target: str, reference: str, **kwargs: dict[str, Any]
+) -> list[tuple[str, str]]:
+    """Pairs the contents of a file together by line
+    Arguments:
+        target: The target file text.
+        reference: The reference file text.
+        state: The current evaluation state.
+    Returns:
+        A list of tuples of the target and reference file text.
+    """
+    return list(zip(target.split("\n"), reference.split("\n")))
+@register_pairing_method(name="line-comment")
+def pair_by_line_comment(
+    target: str, reference: str, **kwargs: dict[str, Any]
+) -> list[tuple[str, str]]:
+    """Pairs the comments of a file together by line
+    **WARNING**: Do not use, as this method is extremely brittle.
+    Arguments:
+        target: The target file text.
+        reference: The reference/reference file text.
+        state: The current evaluation state.
+    Returns:
+        A list of tuples of the target and reference file text.
+    """
+    splitter_kwargs = dict(
+        max_tokens=kwargs["token_limit"] // 2.5,
+        model=kwargs["llm"],
+        protected_node_types=(NodeType("comment"),),
+        prune_node_types=tuple(),
+    )
+    if kwargs["target_file"] is None or kwargs["reference_file"] is None:
+        raise ValueError("Error: must provide file for pair by line comment")
+    if kwargs["lang"] is None:
+        raise ValueError("Error: must provide language for pair by line comment")
+    if kwargs["lang"] in CUSTOM_SPLITTERS:
+        if kwargs["lang"] == "mumps":
+            splitter = MumpsSplitter(**splitter_kwargs)
+        elif kwargs["lang"] == "binary":
+            splitter = BinarySplitter(**splitter_kwargs)
+    else:
+        splitter = TreeSitterSplitter(language=kwargs["lang"], **splitter_kwargs)
+    target_tree = splitter.split(kwargs["target_file"])
+    reference_tree = splitter.split(kwargs["reference_file"])
+    pairs = []
+    def _parse_pairs(node1, node2, pairs):
+        for c1, c2 in zip(node1.children, node2.children):
+            if c1.node_type == "comment" and c2.node_type == "comment":
+                pairs.append((c1.complete_text, c2.complete_text))
+            else:
+                _parse_pairs(c1, c2, pairs)
+    _parse_pairs(target_tree, reference_tree, pairs)
+    return pairs

janus/metrics/llm_metrics.py ADDED Viewed

@@ -0,0 +1,202 @@
+from pathlib import Path
+from typing import Any
+import click
+import typer
+from langchain_core.exceptions import OutputParserException
+from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_core.pydantic_v1 import BaseModel, Field
+from typing_extensions import Annotated
+from .metric import metric
+class LLMMetricOutput(BaseModel):
+    """The output of an LLM evaluation metric."""
+    thought: str = Field(
+        ...,
+        description=(
+            "The thought process that you took to reach your value determination."
+        ),
+    )
+    value: str | float | int = Field(
+        ..., description="The value of the metric described in the prompt."
+    )
+def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
+    """Load a default prompt from a file.
+    Arguments:
+        path: The path to the file.
+        language: The language of the prompt.
+        pydantic_model: The Pydantic model to use for parsing the output.
+    Returns:
+        The prompt text.
+    """
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {path}")
+    prompt = PromptTemplate.from_template(
+        path.read_text(),
+        template_format="f-string",
+        partial_variables={
+            "language": language,
+            "format_instructions": parser.get_format_instructions(),
+        },
+    )
+    return prompt
+def evaluate(
+    target: str,
+    language: str,
+    model: str,
+    prompt_path: Path,
+    reference: str | None = None,
+):
+    """Calculate the LLM self evaluation score.
+    Arguments:
+        target: The target text.
+        language: The language that the target code is written in.
+        prompt_path: The filepath of the prompt text
+        reference: The reference text.
+    Returns:
+        The LLM Evaluation score.
+    """
+    parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
+    prompt = load_prompt(prompt_path, language, parser)
+    chain = prompt | model | parser
+    try:
+        output = (
+            chain.invoke(dict(target=target, reference=reference))
+            if reference
+            else chain.invoke(dict(target=target))
+        )
+        return output["value"]
+    except OutputParserException:
+        return False
+@metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
+def llm_evaluate_option(
+    target: str,
+    metric: Annotated[
+        str,
+        typer.Option(
+            "--metric",
+            "-m",
+            help=("The pre-defined metric to use for evaluation."),
+            click_type=click.Choice(
+                [
+                    "quality",
+                    "clarity",
+                    "faithfulness",
+                    "completeness",
+                    "hallucination",
+                    "readability",
+                    "usefulness",
+                ]
+            ),
+        ),
+    ] = "quality",
+    prompt: Annotated[
+        str,
+        None,
+        typer.Option(
+            "--prompt",
+            "-P",
+            help=("A custom prompt in a .txt file to use for evaluation."),
+        ),
+    ] = None,
+    num_eval: Annotated[
+        int,
+        typer.Option(
+            "-n",
+            "--num-eval",
+            help="Number of times to run the evaluation",
+        ),
+    ] = 1,
+    **kwargs,
+) -> Any:
+    """CLI option to calculate the LLM self evaluation score.
+    Arguments:
+        target: The target text.
+        reference: The reference text.
+        metric: The pre-defined metric to use for evaluation.
+        prompt: The prompt text.
+    Returns:
+        The LLM Evaluation score.
+    """
+    prompt_path: Path = (
+        Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
+    )
+    if num_eval == 1:
+        return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
+    else:
+        return [
+            evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
+            for _ in range(num_eval)
+        ]
+@metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
+def llm_evaluate_ref_option(
+    target: str,
+    reference: str,
+    metric: Annotated[
+        str,
+        typer.Option(
+            "--metric",
+            "-m",
+            help=("The pre-defined metric to use for evaluation."),
+            click_type=click.Choice(["faithfulness"]),
+        ),
+    ] = "faithfulness",
+    prompt: Annotated[
+        str,
+        None,
+        typer.Option(
+            "--prompt",
+            "-P",
+            help=("A custom prompt in a .txt file to use for evaluation."),
+        ),
+    ] = None,
+    num_eval: Annotated[
+        int,
+        typer.Option(
+            "-n",
+            "--num-eval",
+            help="Number of times to run evaluation for pair",
+        ),
+    ] = 1,
+    **kwargs,
+) -> Any:
+    """CLI option to calculate the LLM self evaluation score, for evaluations which
+    require a reference file (e.g. faithfulness)
+    Arguments:
+        target: The target text.
+        reference: The reference text.
+        metric: The pre-defined metric to use for evaluation.
+        prompt: The prompt text.
+    Returns:
+        The LLM Evaluation score.
+    """
+    prompt_path: Path = (
+        Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
+    )
+    if num_eval == 1:
+        return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
+    else:
+        return [
+            evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
+            for _ in range(num_eval)
+        ]

janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

janus-llm 1.0.0py3-none-any.whl → 2.0.0py3-none-any.whl