PyPI - janus-llm - Versions diffs - 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl - Mend

janus-llm 4.2.0py3-none-any.whl → 4.3.5py3-none-any.whl

Files changed (134) hide show

janus/__init__.py +1 -1
janus/__main__.py +1 -1
janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
janus/_tests/test_cli.py +3 -2
janus/cli/aggregate.py +135 -0
janus/cli/cli.py +111 -0
janus/cli/constants.py +43 -0
janus/cli/database.py +289 -0
janus/cli/diagram.py +178 -0
janus/cli/document.py +174 -0
janus/cli/embedding.py +122 -0
janus/cli/llm.py +187 -0
janus/cli/partition.py +125 -0
janus/cli/self_eval.py +149 -0
janus/cli/translate.py +183 -0
janus/converter/__init__.py +1 -1
janus/converter/_tests/test_translate.py +2 -0
janus/converter/converter.py +129 -92
janus/converter/document.py +21 -14
janus/converter/evaluate.py +237 -4
janus/converter/translate.py +3 -3
janus/embedding/collections.py +1 -1
janus/language/alc/_tests/alc.asm +3779 -0
janus/language/alc/_tests/test_alc.py +1 -1
janus/language/alc/alc.py +9 -4
janus/language/binary/_tests/hello.bin +0 -0
janus/language/block.py +47 -12
janus/language/file.py +1 -1
janus/language/mumps/_tests/mumps.m +235 -0
janus/language/splitter.py +31 -23
janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
janus/language/treesitter/_tests/languages/matlab.m +225 -0
janus/language/treesitter/treesitter.py +9 -1
janus/llm/models_info.py +26 -13
janus/metrics/_tests/asm_test_file.asm +10 -0
janus/metrics/_tests/mumps_test_file.m +6 -0
janus/metrics/_tests/test_treesitter_metrics.py +1 -1
janus/metrics/prompts/clarity.txt +8 -0
janus/metrics/prompts/completeness.txt +16 -0
janus/metrics/prompts/faithfulness.txt +10 -0
janus/metrics/prompts/hallucination.txt +16 -0
janus/metrics/prompts/quality.txt +8 -0
janus/metrics/prompts/readability.txt +16 -0
janus/metrics/prompts/usefulness.txt +16 -0
janus/parsers/code_parser.py +4 -4
janus/parsers/doc_parser.py +12 -9
janus/parsers/eval_parsers/incose_parser.py +134 -0
janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
janus/parsers/parser.py +7 -0
janus/parsers/partition_parser.py +47 -13
janus/parsers/reqs_parser.py +8 -5
janus/parsers/uml.py +5 -4
janus/prompts/prompt.py +2 -2
janus/prompts/templates/README.md +30 -0
janus/prompts/templates/basic_aggregation/human.txt +6 -0
janus/prompts/templates/basic_aggregation/system.txt +1 -0
janus/prompts/templates/basic_refinement/human.txt +14 -0
janus/prompts/templates/basic_refinement/system.txt +1 -0
janus/prompts/templates/diagram/human.txt +9 -0
janus/prompts/templates/diagram/system.txt +1 -0
janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
janus/prompts/templates/document/human.txt +10 -0
janus/prompts/templates/document/system.txt +1 -0
janus/prompts/templates/document_cloze/human.txt +11 -0
janus/prompts/templates/document_cloze/system.txt +1 -0
janus/prompts/templates/document_cloze/variables.json +4 -0
janus/prompts/templates/document_cloze/variables_asm.json +4 -0
janus/prompts/templates/document_inline/human.txt +13 -0
janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
janus/prompts/templates/multidocument/human.txt +15 -0
janus/prompts/templates/multidocument/system.txt +1 -0
janus/prompts/templates/partition/human.txt +22 -0
janus/prompts/templates/partition/system.txt +1 -0
janus/prompts/templates/partition/variables.json +4 -0
janus/prompts/templates/pseudocode/human.txt +7 -0
janus/prompts/templates/pseudocode/system.txt +7 -0
janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
janus/prompts/templates/refinement/hallucination/human.txt +13 -0
janus/prompts/templates/refinement/hallucination/system.txt +1 -0
janus/prompts/templates/refinement/reflection/human.txt +15 -0
janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/reflection/system.txt +1 -0
janus/prompts/templates/refinement/revision/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/revision/system.txt +1 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
janus/prompts/templates/requirements/human.txt +13 -0
janus/prompts/templates/requirements/system.txt +2 -0
janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
janus/prompts/templates/simple/human.txt +16 -0
janus/prompts/templates/simple/system.txt +3 -0
janus/refiners/format.py +49 -0
janus/refiners/refiner.py +143 -4
janus/utils/enums.py +140 -111
janus/utils/logger.py +2 -0
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
janus_llm-4.3.5.dist-info/RECORD +210 -0
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
janus/cli.py +0 -1343
janus_llm-4.2.0.dist-info/RECORD +0 -113
janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0

janus/cli/database.py ADDED Viewed

@@ -0,0 +1,289 @@
+from pathlib import Path
+from typing import Optional
+import typer
+from typing_extensions import Annotated
+from janus.cli.constants import db_loc, janus_dir
+db = typer.Typer(
+    help="Database commands",
+    add_completion=False,
+    no_args_is_help=True,
+    context_settings={"help_option_names": ["-h", "--help"]},
+)
+@db.command("init", help="Connect to or create a database.")
+def db_init(
+    path: Annotated[
+        str, typer.Option("--path", "-p", help="The path to the database file.")
+    ] = str(janus_dir / "chroma.db"),
+    url: Annotated[
+        str,
+        typer.Option(
+            "--url",
+            "-u",
+            help="The URL of the database if the database is running externally.",
+        ),
+    ] = "",
+) -> None:
+    import os
+    from rich import print
+    from janus.cli.constants import db_file
+    from janus.embedding.database import ChromaEmbeddingDatabase
+    global db_loc
+    if url != "":
+        print(f"Pointing to Chroma DB at {url}")
+        with open(db_file, "w") as f:
+            f.write(url)
+        db_loc = url
+    else:
+        path = os.path.abspath(path)
+        print(f"Setting up Chroma DB at {path}")
+        with open(db_file, "w") as f:
+            f.write(path)
+        db_loc = path
+    global embedding_db
+    embedding_db = ChromaEmbeddingDatabase(db_loc)
+@db.command("status", help="Print current database location.")
+def db_status():
+    from rich import print
+    print(f"Chroma DB currently pointing to {db_loc}")
+@db.command(
+    "ls",
+    help="List the current database's collections. Or supply a collection name to list "
+    "information about its contents.",
+)
+def db_ls(
+    collection_name: Annotated[
+        Optional[str], typer.Argument(help="The name of the collection.")
+    ] = None,
+    peek: Annotated[
+        Optional[int],
+        typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
+    ] = None,
+) -> None:
+    """List the current database's collections"""
+    from rich import print
+    from janus.embedding.database import ChromaEmbeddingDatabase
+    if peek is not None and collection_name is None:
+        print(
+            "\n[bold red]Cannot peek at all collections. Please specify a "
+            "collection by name.[/bold red]"
+        )
+        return
+    db = ChromaEmbeddingDatabase(db_loc)
+    from janus.embedding.collections import Collections
+    collections = Collections(db)
+    collection_list = collections.get(collection_name)
+    for collection in collection_list:
+        print(
+            f"\n[bold underline]Collection[/bold underline]: "
+            f"[bold salmon1]{collection.name}[/bold salmon1]"
+        )
+        print(f"  ID: {collection.id}")
+        print(f"  Metadata: {collection.metadata}")
+        print(f"  Tenant: [green]{collection.tenant}[/green]")
+        print(f"  Database: [green]{collection.database}[/green]")
+        print(f"  Length: {collection.count()}")
+        if peek:
+            entry = collection.peek(peek)
+            entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
+            if peek == 1:
+                print("  [bold]Peeking at first entry[/bold]:")
+            else:
+                print(f"  [bold]Peeking at first {peek} entries[/bold]:")
+            print(entry)
+        print()
+@db.command("add", help="Add a collection to the current database.")
+def db_add(
+    collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
+    model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
+    input_dir: Annotated[
+        str,
+        typer.Option(
+            "--input",
+            "-i",
+            help="The directory containing the source code to be added.",
+        ),
+    ] = "./",
+    input_lang: Annotated[
+        str, typer.Option("--language", "-l", help="The language of the source code.")
+    ] = "python",
+    max_tokens: Annotated[
+        int,
+        typer.Option(
+            "--max-tokens",
+            "-m",
+            help="The maximum number of tokens for each chunk of input source code.",
+        ),
+    ] = 4096,
+) -> None:
+    """Add a collection to the database
+    Arguments:
+        collection_name: The name of the collection to add
+        model_name: The name of the embedding model to use
+        input_dir: The directory containing the source code to be added
+        input_lang: The language of the source code
+        max_tokens: The maximum number of tokens for each chunk of input source code
+    """
+    # TODO: import factory
+    import json
+    from pathlib import Path
+    from rich.console import Console
+    from janus.cli.constants import collections_config_file, get_collections_config
+    from janus.embedding.vectorize import ChromaDBVectorizer
+    from janus.language.binary import BinarySplitter
+    from janus.language.mumps import MumpsSplitter
+    from janus.language.naive.registry import CUSTOM_SPLITTERS
+    from janus.language.treesitter import TreeSitterSplitter
+    from janus.utils.enums import LANGUAGES
+    console = Console()
+    added_to = _check_collection(collection_name, input_dir)
+    collections_config = get_collections_config()
+    with console.status(
+        f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
+        spinner="arrow3",
+    ):
+        vectorizer_factory = ChromaDBVectorizer()
+        vectorizer = vectorizer_factory.create_vectorizer(
+            path=db_loc, config=collections_config
+        )
+        vectorizer.get_or_create_collection(collection_name, model_name=model_name)
+        input_dir = Path(input_dir)
+        suffixes = [f".{ext}" for ext in LANGUAGES[input_lang]["suffixes"]]
+        input_paths = [file for ext in suffixes for file in input_dir.rglob(f"**/*{ext}")]
+        if input_lang in CUSTOM_SPLITTERS:
+            if input_lang == "mumps":
+                splitter = MumpsSplitter(
+                    max_tokens=max_tokens,
+                )
+            elif input_lang == "binary":
+                splitter = BinarySplitter(
+                    max_tokens=max_tokens,
+                )
+        else:
+            splitter = TreeSitterSplitter(
+                language=input_lang,
+                max_tokens=max_tokens,
+            )
+        for input_path in input_paths:
+            input_block = splitter.split(input_path)
+            vectorizer.add_nodes_recursively(
+                input_block,
+                collection_name,
+                input_path.name,
+            )
+    total_files = len(
+        [path for path in Path.glob(input_dir, "**/*") if not path.is_dir()]
+    )
+    if added_to:
+        print(
+            f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
+            f"  Embedding Model: [green]{model_name}[/green]\n"
+            f"  Input Directory: {input_dir.absolute()}\n"
+            f"  {input_lang} [green]{suffixes}[/green] Files: "
+            f"{len(input_paths)}\n"
+            "  Other Files (skipped): "
+            f"{total_files - len(input_paths)}\n"
+        )
+    else:
+        print(
+            f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
+            f"  Embedding Model: '{model_name}'\n"
+            f"  Input Directory: {input_dir.absolute()}\n"
+            f"  {input_lang} [green]{suffixes}[/green] Files: "
+            f"{len(input_paths)}\n"
+            "  Other Files (skipped): "
+            f"{total_files - len(input_paths)}\n"
+        )
+    with open(collections_config_file, "w") as f:
+        json.dump(vectorizer.config, f, indent=2)
+@db.command(
+    "rm",
+    help="Remove a collection from the database.",
+)
+def db_rm(
+    collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
+    confirm: Annotated[
+        bool,
+        typer.Option(
+            "--yes",
+            "-y",
+            help="Confirm the removal of the collection.",
+        ),
+    ],
+) -> None:
+    """Remove a collection from the database
+    Arguments:
+        collection_name: The name of the collection to remove
+    """
+    from rich.prompt import Confirm
+    from janus.embedding.collections import Collections
+    from janus.embedding.database import ChromaEmbeddingDatabase
+    if not confirm:
+        delete = Confirm.ask(
+            f"\nAre you sure you want to [bold red]remove[/bold red] "
+            f"[bold salmon1]{collection_name}[/bold salmon1]?",
+        )
+    else:
+        delete = True
+    if not delete:
+        raise typer.Abort()
+    db = ChromaEmbeddingDatabase(db_loc)
+    collections = Collections(db)
+    collections.delete(collection_name)
+    print(
+        f"[bold red]Removed[/bold red] collection "
+        f"[bold salmon1]{collection_name}[/bold salmon1]"
+    )
+def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
+    from chromadb.errors import InvalidCollectionException
+    from janus.embedding.collections import Collections
+    from janus.embedding.database import ChromaEmbeddingDatabase
+    db = ChromaEmbeddingDatabase(db_loc)
+    collections = Collections(db)
+    added_to = False
+    try:
+        collections.get(collection_name)
+        # confirm_add = Confirm.ask(
+        #     f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
+        #     "you sure you want to update it with the contents of"
+        #     f"[bold green]{input_dir}[/bold green]?"
+        # )
+        added_to = True
+        # if not confirm_add:
+        #     raise typer.Abort()
+    except InvalidCollectionException:
+        pass
+    return added_to

janus/cli/diagram.py ADDED Viewed

@@ -0,0 +1,178 @@
+from pathlib import Path
+from typing import Optional
+import click
+import typer
+from typing_extensions import Annotated
+from janus.cli.constants import REFINERS
+from janus.language.naive.registry import CUSTOM_SPLITTERS
+from janus.utils.enums import LANGUAGES
+def diagram(
+    input_dir: Annotated[
+        Path,
+        typer.Option(
+            "--input",
+            "-i",
+            help="The directory containing the source code to be translated. "
+            "The files should all be in one flat directory.",
+        ),
+    ],
+    language: Annotated[
+        str,
+        typer.Option(
+            "--language",
+            "-l",
+            help="The language of the source code.",
+            click_type=click.Choice(sorted(LANGUAGES)),
+        ),
+    ],
+    output_dir: Annotated[
+        Path,
+        typer.Option(
+            "--output-dir", "-o", help="The directory to store the translated code in."
+        ),
+    ],
+    llm_name: Annotated[
+        str,
+        typer.Option(
+            "--llm",
+            "-L",
+            help="The custom name of the model set with 'janus llm add'.",
+        ),
+    ],
+    failure_dir: Annotated[
+        Optional[Path],
+        typer.Option(
+            "--failure-directory",
+            "-f",
+            help="The directory to store failure files during translation",
+        ),
+    ] = None,
+    max_prompts: Annotated[
+        int,
+        typer.Option(
+            "--max-prompts",
+            "-m",
+            help="The maximum number of times to prompt a model on one functional block "
+            "before exiting the application. This is to prevent wasting too much money.",
+        ),
+    ] = 10,
+    overwrite: Annotated[
+        bool,
+        typer.Option(
+            "--overwrite/--preserve",
+            help="Whether to overwrite existing files in the output directory",
+        ),
+    ] = False,
+    temperature: Annotated[
+        float,
+        typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
+    ] = 0.7,
+    collection: Annotated[
+        str,
+        typer.Option(
+            "--collection",
+            "-c",
+            help="If set, will put the translated result into a Chroma DB "
+            "collection with the name provided.",
+        ),
+    ] = None,
+    diagram_type: Annotated[
+        str,
+        typer.Option(
+            "--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
+        ),
+    ] = "Activity",
+    add_documentation: Annotated[
+        bool,
+        typer.Option(
+            "--add-documentation/--no-documentation",
+            "-ad",
+            help="Whether to use documentation in generation",
+        ),
+    ] = False,
+    splitter_type: Annotated[
+        str,
+        typer.Option(
+            "-S",
+            "--splitter",
+            help="Name of custom splitter to use",
+            click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
+        ),
+    ] = "file",
+    refiner_types: Annotated[
+        list[str],
+        typer.Option(
+            "-r",
+            "--refiner",
+            help="List of refiner types to use. Add -r for each refiner to use in\
+                refinement chain",
+            click_type=click.Choice(list(REFINERS.keys())),
+        ),
+    ] = ["JanusRefiner"],
+    retriever_type: Annotated[
+        str,
+        typer.Option(
+            "-R",
+            "--retriever",
+            help="Name of custom retriever to use",
+            click_type=click.Choice(["active_usings", "language_docs"]),
+        ),
+    ] = None,
+):
+    from janus.cli.constants import db_loc, get_collections_config
+    from janus.converter.diagram import DiagramGenerator
+    refiner_types = [REFINERS[r] for r in refiner_types]
+    model_arguments = dict(temperature=temperature)
+    collections_config = get_collections_config()
+    diagram_generator = DiagramGenerator(
+        model=llm_name,
+        model_arguments=model_arguments,
+        source_language=language,
+        max_prompts=max_prompts,
+        db_path=db_loc,
+        db_config=collections_config,
+        splitter_type=splitter_type,
+        refiner_types=refiner_types,
+        retriever_type=retriever_type,
+        diagram_type=diagram_type,
+        add_documentation=add_documentation,
+    )
+    diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
+def render(
+    input_dir: Annotated[
+        str,
+        typer.Option(
+            "--input",
+            "-i",
+        ),
+    ],
+    output_dir: Annotated[str, typer.Option("--output", "-o")],
+):
+    import json
+    import subprocess  # nosec
+    from janus.cli.constants import homedir
+    input_dir = Path(input_dir)
+    output_dir = Path(output_dir)
+    for input_file in input_dir.rglob("*.json"):
+        with open(input_file, "r") as f:
+            data = json.load(f)
+        output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
+        if not output_file.parent.exists():
+            output_file.parent.mkdir()
+        text = data["output"].replace("\\n", "\n").strip()
+        output_file.write_text(text)
+        jar_path = homedir / ".janus/lib/plantuml.jar"
+        subprocess.run(["java", "-jar", jar_path, output_file])  # nosec
+        output_file.unlink()

janus/cli/document.py ADDED Viewed

@@ -0,0 +1,174 @@
+from pathlib import Path
+from typing import Optional
+import click
+import typer
+from typing_extensions import Annotated
+from janus.cli.constants import REFINERS
+from janus.language.naive.registry import CUSTOM_SPLITTERS
+from janus.utils.enums import LANGUAGES
+def document(
+    input_dir: Annotated[
+        Path,
+        typer.Option(
+            "--input",
+            "-i",
+            help="The directory containing the source code to be translated. "
+            "The files should all be in one flat directory.",
+        ),
+    ],
+    language: Annotated[
+        str,
+        typer.Option(
+            "--language",
+            "-l",
+            help="The language of the source code.",
+            click_type=click.Choice(sorted(LANGUAGES)),
+        ),
+    ],
+    output_dir: Annotated[
+        Path,
+        typer.Option(
+            "--output-dir", "-o", help="The directory to store the translated code in."
+        ),
+    ],
+    llm_name: Annotated[
+        str,
+        typer.Option(
+            "--llm",
+            "-L",
+            help="The custom name of the model set with 'janus llm add'.",
+        ),
+    ],
+    failure_dir: Annotated[
+        Optional[Path],
+        typer.Option(
+            "--failure-directory",
+            "-f",
+            help="The directory to store failure files during documentation",
+        ),
+    ] = None,
+    max_prompts: Annotated[
+        int,
+        typer.Option(
+            "--max-prompts",
+            "-m",
+            help="The maximum number of times to prompt a model on one functional block "
+            "before exiting the application. This is to prevent wasting too much money.",
+        ),
+    ] = 10,
+    overwrite: Annotated[
+        bool,
+        typer.Option(
+            "--overwrite/--preserve",
+            help="Whether to overwrite existing files in the output directory",
+        ),
+    ] = False,
+    doc_mode: Annotated[
+        str,
+        typer.Option(
+            "--doc-mode",
+            "-d",
+            help="The documentation mode.",
+            click_type=click.Choice(["cloze", "summary", "multidoc", "requirements"]),
+        ),
+    ] = "cloze",
+    comments_per_request: Annotated[
+        int,
+        typer.Option(
+            "--comments-per-request",
+            "-rc",
+            help="The maximum number of comments to generate per request when using "
+            "Cloze documentation mode.",
+        ),
+    ] = None,
+    drop_comments: Annotated[
+        bool,
+        typer.Option(
+            "--drop-comments/--keep-comments",
+            help="Whether to drop or keep comments in the code sent to the LLM",
+        ),
+    ] = False,
+    temperature: Annotated[
+        float,
+        typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
+    ] = 0.7,
+    collection: Annotated[
+        str,
+        typer.Option(
+            "--collection",
+            "-c",
+            help="If set, will put the translated result into a Chroma DB "
+            "collection with the name provided.",
+        ),
+    ] = None,
+    splitter_type: Annotated[
+        str,
+        typer.Option(
+            "-S",
+            "--splitter",
+            help="Name of custom splitter to use",
+            click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
+        ),
+    ] = "file",
+    refiner_types: Annotated[
+        list[str],
+        typer.Option(
+            "-r",
+            "--refiner",
+            help="List of refiner types to use. Add -r for each refiner to use in\
+                refinement chain",
+            click_type=click.Choice(list(REFINERS.keys())),
+        ),
+    ] = ["JanusRefiner"],
+    retriever_type: Annotated[
+        str,
+        typer.Option(
+            "-R",
+            "--retriever",
+            help="Name of custom retriever to use",
+            click_type=click.Choice(["active_usings", "language_docs"]),
+        ),
+    ] = None,
+    max_tokens: Annotated[
+        int,
+        typer.Option(
+            "--max-tokens",
+            "-M",
+            help="The maximum number of tokens the model will take in. "
+            "If unspecificed, model's default max will be used.",
+        ),
+    ] = None,
+):
+    from janus.cli.constants import db_loc, get_collections_config
+    from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
+    from janus.converter.requirements import RequirementsDocumenter
+    refiner_types = [REFINERS[r] for r in refiner_types]
+    model_arguments = dict(temperature=temperature)
+    collections_config = get_collections_config()
+    kwargs = dict(
+        model=llm_name,
+        model_arguments=model_arguments,
+        source_language=language,
+        max_prompts=max_prompts,
+        max_tokens=max_tokens,
+        db_path=db_loc,
+        db_config=collections_config,
+        splitter_type=splitter_type,
+        refiner_types=refiner_types,
+        retriever_type=retriever_type,
+    )
+    if doc_mode == "cloze":
+        documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
+    elif doc_mode == "multidoc":
+        documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
+    elif doc_mode == "requirements":
+        documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
+    else:
+        documenter = Documenter(drop_comments=drop_comments, **kwargs)
+    documenter.translate(input_dir, output_dir, failure_dir, overwrite, collection)

janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl

janus-llm 4.2.0py3-none-any.whl → 4.3.5py3-none-any.whl