PyPI - janus-llm - Versions diffs - 4.3.5__py3-none-any.whl → 4.4.5__py3-none-any.whl - Mend

janus-llm 4.3.5py3-none-any.whl → 4.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

janus/__init__.py +1 -1
janus/cli/aggregate.py +2 -2
janus/cli/cli.py +6 -0
janus/cli/constants.py +6 -0
janus/cli/diagram.py +36 -7
janus/cli/document.py +10 -1
janus/cli/llm.py +7 -3
janus/cli/partition.py +10 -1
janus/cli/pipeline.py +123 -0
janus/cli/self_eval.py +1 -3
janus/cli/translate.py +10 -1
janus/converter/_tests/test_translate.py +5 -5
janus/converter/chain.py +180 -0
janus/converter/converter.py +333 -78
janus/converter/diagram.py +8 -6
janus/converter/document.py +7 -3
janus/converter/evaluate.py +140 -148
janus/converter/partition.py +2 -10
janus/converter/requirements.py +4 -40
janus/converter/translate.py +2 -58
janus/language/block.py +31 -2
janus/metrics/metric.py +47 -124
janus/parsers/reqs_parser.py +3 -3
{janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/METADATA +12 -12
{janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/RECORD +28 -28
janus/metrics/_tests/test_llm.py +0 -90
janus/metrics/llm_metrics.py +0 -202
{janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
{janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +0 -0
{janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/entry_points.txt +0 -0

janus/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
 from janus.converter.translate import Translator
 from janus.metrics import *  # noqa: F403
-__version__ = "4.3.5"
+__version__ = "4.4.5"
 # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
 warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)

janus/cli/aggregate.py CHANGED Viewed

@@ -33,7 +33,7 @@ def aggregate(
     output_dir: Annotated[
         Path,
         typer.Option(
-            "--output-dir", "-o", help="The directory to store the translated code in."
+            "--output", "-o", help="The directory to store the translated code in."
         ),
     ],
     llm_name: Annotated[
@@ -130,6 +130,6 @@ def aggregate(
         db_path=db_loc,
         db_config=collections_config,
         splitter_type=splitter_type,
-        prompt_template="basic_aggregation",
+        prompt_templates="basic_aggregation",
     )
     aggregator.translate(input_dir, output_dir, failure_dir, overwrite, collection)

janus/cli/cli.py CHANGED Viewed

@@ -10,6 +10,7 @@ from janus.cli.document import document
 from janus.cli.embedding import embedding
 from janus.cli.llm import llm
 from janus.cli.partition import partition
+from janus.cli.pipeline import pipeline
 from janus.cli.self_eval import llm_self_eval
 from janus.cli.translate import translate
 from janus.metrics.cli import evaluate
@@ -101,6 +102,11 @@ translate = app.command(
     no_args_is_help=True,
 )(translate)
+pipeline = app.command(
+    help="Run a janus pipeline",
+    no_args_is_help=True,
+)(pipeline)
 app.add_typer(db, name="db")
 app.add_typer(llm, name="llm")
 app.add_typer(evaluate, name="evaluate")

janus/cli/constants.py CHANGED Viewed

@@ -33,6 +33,12 @@ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
 )
 REFINERS = {r.__name__: r for r in REFINER_TYPES}
+CONVERTER_TYPES = get_subclasses(janus.converter.converter.Converter).union(
+    {janus.converter.converter.Converter}
+)
+CONVERTERS = {c.__name__: c for c in CONVERTER_TYPES}
 def get_collections_config():
     if collections_config_file.exists():

janus/cli/diagram.py CHANGED Viewed

@@ -32,7 +32,7 @@ def diagram(
     output_dir: Annotated[
         Path,
         typer.Option(
-            "--output-dir", "-o", help="The directory to store the translated code in."
+            "--output", "-o", help="The directory to store the translated code in."
         ),
     ],
     llm_name: Annotated[
@@ -112,7 +112,7 @@ def diagram(
                 refinement chain",
             click_type=click.Choice(list(REFINERS.keys())),
         ),
-    ] = ["JanusRefiner"],
+    ] = ["CodeFormatRefiner"],
     retriever_type: Annotated[
         str,
         typer.Option(
@@ -122,6 +122,24 @@ def diagram(
             click_type=click.Choice(["active_usings", "language_docs"]),
         ),
     ] = None,
+    extract_variables: Annotated[
+        bool,
+        typer.Option(
+            "-ev",
+            "--extract-variables",
+            help="Present when diagram generator should \
+                extract variables before producing diagram",
+        ),
+    ] = False,
+    use_janus_inputs: Annotated[
+        bool,
+        typer.Option(
+            "-j",
+            "--use-janus-inputs",
+            help="Present when diagram generator should be\
+                  be using janus files as inputs",
+        ),
+    ] = False,
 ):
     from janus.cli.constants import db_loc, get_collections_config
     from janus.converter.diagram import DiagramGenerator
@@ -141,6 +159,8 @@ def diagram(
         retriever_type=retriever_type,
         diagram_type=diagram_type,
         add_documentation=add_documentation,
+        extract_variables=extract_variables,
+        use_janus_inputs=use_janus_inputs,
     )
     diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
@@ -170,9 +190,18 @@ def render(
         if not output_file.parent.exists():
             output_file.parent.mkdir()
-        text = data["output"].replace("\\n", "\n").strip()
-        output_file.write_text(text)
+        def _render(obj, ind=0):
+            for o in obj["outputs"]:
+                if isinstance(o, dict):
+                    ind += _render(o, ind)
+                else:
+                    outfile_new = output_file.with_stem(f"{output_file.stem}_{ind}")
+                    text = o.replace("\\n", "\n").strip()
+                    outfile_new.write_text(text)
+                    jar_path = homedir / ".janus/lib/plantuml.jar"
+                    subprocess.run(["java", "-jar", jar_path, outfile_new])  # nosec
+                    outfile_new.unlink()
+                    ind += 1
+            return ind
-        jar_path = homedir / ".janus/lib/plantuml.jar"
-        subprocess.run(["java", "-jar", jar_path, output_file])  # nosec
-        output_file.unlink()
+        _render(data)

janus/cli/document.py CHANGED Viewed

@@ -32,7 +32,7 @@ def document(
     output_dir: Annotated[
         Path,
         typer.Option(
-            "--output-dir", "-o", help="The directory to store the translated code in."
+            "--output", "-o", help="The directory to store the translated code in."
         ),
     ],
     llm_name: Annotated[
@@ -142,6 +142,14 @@ def document(
             "If unspecificed, model's default max will be used.",
         ),
     ] = None,
+    use_janus_inputs: Annotated[
+        bool,
+        typer.Option(
+            "-j",
+            "--use-janus-inputs",
+            help="Present if converter should use janus files as inputs",
+        ),
+    ] = False,
 ):
     from janus.cli.constants import db_loc, get_collections_config
     from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
@@ -161,6 +169,7 @@ def document(
         splitter_type=splitter_type,
         refiner_types=refiner_types,
         retriever_type=retriever_type,
+        use_janus_inputs=use_janus_inputs,
     )
     if doc_mode == "cloze":
         documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)

janus/cli/llm.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import click
 import typer
+from rich import print
 from typing_extensions import Annotated
 from janus.llm.models_info import MODEL_TYPE_CONSTRUCTORS
@@ -45,7 +46,10 @@ def llm_add(
     if model_type == "HuggingFace":
         url = typer.prompt("Enter the model's URL")
         max_tokens = typer.prompt(
-            "Enter the model's maximum tokens", default=4096, type=int
+            "Enter the model's token limit", default=65536, type=int
+        )
+        max_tokens = typer.prompt(
+            "Enter the model's max output tokens", default=8192, type=int
         )
         in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
         out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
@@ -61,6 +65,7 @@ def llm_add(
         )
         cfg = {
             "model_type": model_type,
+            "model_id": "gpt-4o",  # This is a placeholder to use the Azure PromptEngine
             "model_args": params,
             "token_limit": max_tokens,
             "model_cost": {"input": in_cost, "output": out_cost},
@@ -172,8 +177,7 @@ def llm_ls(
 ):
     import json
-    from janus.cli.constants import MODEL_CONFIG_DIR
-    from janus.llm.models_info import MODEL_TYPES
+    from janus.llm.models_info import MODEL_CONFIG_DIR, MODEL_TYPES
     print("\n[green]User-configured models[/green]:")
     for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):

janus/cli/partition.py CHANGED Viewed

@@ -31,7 +31,7 @@ def partition(
     output_dir: Annotated[
         Path,
         typer.Option(
-            "--output-dir", "-o", help="The directory to store the partitioned code in."
+            "--output", "-o", help="The directory to store the partitioned code in."
         ),
     ],
     llm_name: Annotated[
@@ -106,6 +106,14 @@ def partition(
             help="The limit on the number of tokens per partition.",
         ),
     ] = 8192,
+    use_janus_inputs: Annotated[
+        bool,
+        typer.Option(
+            "-j",
+            "--use-janus-inputs",
+            help="Present if converter should use janus inputs",
+        ),
+    ] = False,
 ):
     from janus.converter.partition import Partitioner
@@ -120,6 +128,7 @@ def partition(
         splitter_type=splitter_type,
         refiner_types=refiner_types,
         partition_token_limit=partition_token_limit,
+        use_janus_inputs=use_janus_inputs,
     )
     partitioner = Partitioner(**kwargs)
     partitioner.translate(input_dir, output_dir, failure_dir, overwrite)

janus/cli/pipeline.py ADDED Viewed

@@ -0,0 +1,123 @@
+import json
+from pathlib import Path
+from typing import Optional
+import click
+import typer
+from typing_extensions import Annotated
+from janus.cli.constants import CONVERTERS
+from janus.converter.chain import ConverterChain
+from janus.utils.enums import LANGUAGES
+def instiantiate(x):
+    if isinstance(x, dict):
+        if "type" in x:
+            if "args" not in x:
+                x["args"] = []
+            x["args"] = [instiantiate(a) for a in x["args"]]
+            if "kwargs" not in x:
+                x["kwargs"] = {}
+            x["kwargs"] = {k: instiantiate(x["kwargs"][k]) for k in x["kwargs"]}
+            if x["type"] not in CONVERTERS:
+                raise ValueError(f"Error: {x['type']} is not a Converter")
+            return CONVERTERS[x["type"]](*x["args"], **x["kwargs"])
+        else:
+            return {k: instiantiate(x[k]) for k in x}
+    elif isinstance(x, list):
+        return [instiantiate(a) for a in x]
+    else:
+        return x
+def instiantiate_pipeline(
+    pipeline: list[dict],
+    language: str = "text",
+    model: str = "gpt-4o",
+    use_janus_inputs: None | bool = None,
+):
+    if "kwargs" not in pipeline[0]:
+        pipeline[0]["kwargs"] = {}
+    pipeline[0]["kwargs"].update(source_language=language, model=model)
+    if use_janus_inputs is not None:
+        pipeline[0]["kwargs"].update(janus_inputs=use_janus_inputs)
+    print(pipeline[0])
+    converters = [instiantiate(pipeline[0])]
+    for p in pipeline[1:]:
+        p["kwargs"].update(source_language=converters[-1].target_language, model=model)
+        converters.append(instiantiate(p))
+    return ConverterChain(*converters)
+def pipeline(
+    pipeline_file: Annotated[
+        Path, typer.Option("-p", "--pipeline", help="Name of pipeline file to use")
+    ],
+    input_dir: Annotated[
+        Path,
+        typer.Option(
+            "--input",
+            "-i",
+            help="The directory containing the source code to be translated. "
+            "The files should all be in one flat directory.",
+        ),
+    ],
+    language: Annotated[
+        str,
+        typer.Option(
+            "--language",
+            "-l",
+            help="The language of the source code.",
+            click_type=click.Choice(sorted(LANGUAGES)),
+        ),
+    ],
+    output_dir: Annotated[
+        Path,
+        typer.Option(
+            "--output", "-o", help="The directory to store the translated code in."
+        ),
+    ],
+    llm_name: Annotated[
+        str,
+        typer.Option(
+            "--llm",
+            "-L",
+            help="The custom name of the model set with 'janus llm add'.",
+        ),
+    ],
+    failure_dir: Annotated[
+        Optional[Path],
+        typer.Option(
+            "--failure-directory",
+            "-f",
+            help="The directory to store failure files during documentation",
+        ),
+    ] = None,
+    overwrite: Annotated[
+        bool,
+        typer.Option(
+            "--overwrite/--preserve",
+            help="Whether to overwrite existing files in the output directory",
+        ),
+    ] = False,
+    use_janus_inputs: Annotated[
+        Optional[bool],
+        typer.Option(
+            "-j",
+            "--use-janus-inputs",
+            help="Present if converter chain should use janus input files",
+        ),
+    ] = None,
+):
+    with open(pipeline_file, "r") as f:
+        json_obj = json.load(f)
+    pipeline = instiantiate_pipeline(
+        json_obj, language=language, model=llm_name, use_janus_inputs=use_janus_inputs
+    )
+    pipeline.translate(
+        input_directory=input_dir,
+        output_directory=output_dir,
+        failure_directory=failure_dir,
+        overwrite=overwrite,
+    )

janus/cli/self_eval.py CHANGED Viewed

@@ -31,9 +31,7 @@ def llm_self_eval(
     ],
     output_dir: Annotated[
         Path,
-        typer.Option(
-            "--output-dir", "-o", help="The directory to store the evaluations in."
-        ),
+        typer.Option("--output", "-o", help="The directory to store the evaluations in."),
     ],
     failure_dir: Annotated[
         Optional[Path],

janus/cli/translate.py CHANGED Viewed

@@ -148,6 +148,14 @@ def translate(
             "If unspecificed, model's default max will be used.",
         ),
     ] = None,
+    use_janus_inputs: Annotated[
+        bool,
+        typer.Option(
+            "-j",
+            "--use-janus-inputs",
+            help="Prsent if translator should use janus files as inputs",
+        ),
+    ] = False,
 ):
     from janus.cli.constants import db_loc, get_collections_config
     from janus.converter.translate import Translator
@@ -173,11 +181,12 @@ def translate(
         target_version=target_version,
         max_prompts=max_prompts,
         max_tokens=max_tokens,
-        prompt_template=prompt_template,
+        prompt_templates=prompt_template,
         db_path=db_loc,
         db_config=collections_config,
         splitter_type=splitter_type,
         refiner_types=refiner_types,
         retriever_type=retriever_type,
+        use_janus_inputs=use_janus_inputs,
     )
     translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)

janus/converter/_tests/test_translate.py CHANGED Viewed

@@ -59,14 +59,14 @@ class TestTranslator(unittest.TestCase):
         self.req_translator = RequirementsDocumenter(
             model="gpt-4o-mini",
             source_language="fortran",
-            prompt_template="requirements",
+            prompt_templates="requirements",
         )
     @pytest.mark.translate
     def test_translate(self):
         """Test translate method."""
         # Delete a file if it's already there
-        python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.py"
+        python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.json"
         python_file.unlink(missing_ok=True)
         python_file.parent.rmdir() if python_file.parent.is_dir() else None
         self.translator.translate(self.test_file.parent, self.test_file.parent / "python")
@@ -82,7 +82,7 @@ class TestTranslator(unittest.TestCase):
         self.assertRaises(
             ValueError, self.translator.set_source_language, "scribbledy-doop"
         )
-        self.translator.set_prompt("pish posh")
+        self.translator.set_prompts(["pish posh"])
         self.assertRaises(ValueError, self.translator._load_parameters)
@@ -149,10 +149,10 @@ def test_language_combinations(
     translator.set_model("gpt-4o")
     translator.set_source_language(source_language)
     translator.set_target_language(expected_target_language, expected_target_version)
-    translator.set_prompt(prompt_template)
+    translator.set_prompts(prompt_template)
     translator._load_parameters()
     assert translator._target_language == expected_target_language  # nosec
     assert translator._target_version == expected_target_version  # nosec
     assert translator._splitter.language == source_language  # nosec
     assert translator._splitter.model.model_name == "gpt-4o"  # nosec
-    assert translator._prompt_template_name == prompt_template  # nosec
+    assert translator._prompt_template_names == [prompt_template]  # nosec

janus/converter/chain.py ADDED Viewed

@@ -0,0 +1,180 @@
+from pathlib import Path
+from janus.converter.converter import Converter
+from janus.language.block import CodeBlock, TranslatedCodeBlock
+from janus.utils.logger import create_logger
+log = create_logger(__name__)
+class ConverterChain(Converter):
+    """
+    Class for representing multiple converters chained together
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        if len(args) == 0:
+            raise ValueError("Error: Converter chain must be passed at least 1 converter")
+        for converter in args:
+            if not isinstance(converter, Converter):
+                raise ValueError(f"Error: unrecognized type: {type(converter)}")
+        self._converters = args
+        kwargs.update(
+            source_language=self._converters[0].source_language,
+            target_language=self._converters[-1]._target_language,
+            target_version=self._converters[-1]._target_version,
+            use_janus_inputs=self._converters[0]._use_janus_inputs,
+        )
+        super().__init__(**kwargs)
+    def _run_converters(
+        self, translated_code_block, name: str, failure_path: Path | None = None
+    ):
+        for i, converter in enumerate(self._converters[1:]):
+            if not translated_code_block.translated:
+                log.info(
+                    f"Error: chain failed to translate at step {i}:"
+                    f"{self._converters[i].__class__.__name__}"
+                )
+                break
+            if converter._use_janus_inputs:
+                janus_obj = self._converters[i]._get_output_obj(translated_code_block)
+                translated_code_block = converter.translate_janus_obj(
+                    janus_obj, name, failure_path
+                )
+            else:
+                translated_code_block = converter.translate_block(
+                    translated_code_block.to_codeblock(), name, failure_path
+                )
+        if not translated_code_block.translated:
+            log.info(
+                f"Error: chain failed to translate at step {len(self._converters)-1}: "
+                f"{self._converters[-1].__class__.__name__}"
+            )
+        return translated_code_block
+    def translate_file(
+        self, file: Path, failure_path: Path | None = None
+    ) -> TranslatedCodeBlock:
+        """Translate a file using the chain of converters
+        Arguments:
+            file: The file to translate
+            failure_path: The path to write the failure file to
+        Returns:
+            The translated code block
+        """
+        filename = file.name
+        translated_code_block = self._converters[0].translate_file(file, failure_path)
+        translated_code_block = self._run_converters(
+            translated_code_block, filename, failure_path
+        )
+        return translated_code_block
+    def translate_text(
+        self, text: str, name: str, failure_path: Path | None = None
+    ) -> TranslatedCodeBlock:
+        """Translate a text using the chain of converters
+        Arguments:
+            text: The text to translate
+            name: The name of the file
+            failure_path: The path to write the failure file to
+        Returns:
+            The translated code block
+        """
+        translated_code_block = self._converters[0].translate_text(
+            text, name, failure_path
+        )
+        translated_code_block = self._run_converters(
+            translated_code_block, name, failure_path
+        )
+        return translated_code_block
+    def translate_block(
+        self,
+        input_block: CodeBlock | list[CodeBlock],
+        name: str,
+        failure_path: Path | None = None,
+    ) -> TranslatedCodeBlock:
+        """Translate a block of code using the chain of converters
+        Arguments:
+            input_block: The block of code to translate
+            name: The name of the file
+            failure_path: The path to write the failure file to
+        Returns:
+            The translated code block
+        """
+        translated_code_block = self._converters[0].translate_block(
+            input_block, name, failure_path
+        )
+        translated_code_block = self._run_converters(
+            translated_code_block, name, failure_path
+        )
+        return translated_code_block
+    def _get_output_obj(
+        self, block: TranslatedCodeBlock | list, combine_children: bool = True
+    ) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
+        output_obj = super()._get_output_obj(block, combine_children)
+        intermediate_outputs = []
+        for i, intermediate_out in enumerate(block.previous_generations):
+            if isinstance(intermediate_out, TranslatedCodeBlock):
+                intermediate_outputs.append(
+                    self._converters[i]._get_output_obj(intermediate_out)
+                )
+            else:
+                intermediate_outputs.append(intermediate_out)
+        intermediate_outputs.append(self._converters[-1]._get_output_obj(block))
+        output_obj["intermediate_outputs"] = intermediate_outputs
+        metadata = output_obj["metadata"]
+        metadata["cost"] += sum(
+            b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
+            for b in block.previous_generations
+        )
+        metadata["processing_time"] += sum(
+            (
+                b.processing_time
+                if isinstance(b, TranslatedCodeBlock)
+                else b["metadata"]["processing_time"]
+            )
+            for b in block.previous_generations
+        )
+        metadata["num_requests"] += sum(
+            (
+                b.total_num_requests
+                if isinstance(b, TranslatedCodeBlock)
+                else b["metadata"]["num_requests"]
+            )
+            for b in block.previous_generations
+        )
+        metadata["input_tokens"] += sum(
+            (
+                b.total_request_input_tokens
+                if isinstance(b, TranslatedCodeBlock)
+                else b["metadata"]["input_tokens"]
+            )
+            for b in block.previous_generations
+        )
+        metadata["output_tokens"] += sum(
+            (
+                b.total_request_output_tokens
+                if isinstance(b, TranslatedCodeBlock)
+                else b["metadata"]["output_tokens"]
+            )
+            for b in block.previous_generations
+        )
+        output_obj["metadata"] = metadata
+        if len(block.previous_generations) > 0:
+            b = block.previous_generations[0]
+            output_obj["input"] = (
+                (b.original.text or "")
+                if isinstance(b, TranslatedCodeBlock)
+                else b["input"]
+            )
+        return output_obj

janus-llm 4.3.5__py3-none-any.whl → 4.4.5__py3-none-any.whl

janus-llm 4.3.5py3-none-any.whl → 4.4.5py3-none-any.whl