PyPI - janus-llm - Versions diffs - 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl - Mend

janus-llm 4.3.5py3-none-any.whl → 4.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

janus/__init__.py +1 -1
janus/cli/aggregate.py +2 -2
janus/cli/cli.py +6 -0
janus/cli/constants.py +6 -0
janus/cli/diagram.py +36 -7
janus/cli/document.py +10 -1
janus/cli/llm.py +7 -3
janus/cli/partition.py +10 -1
janus/cli/pipeline.py +126 -0
janus/cli/self_eval.py +10 -3
janus/cli/translate.py +10 -1
janus/converter/__init__.py +2 -0
janus/converter/_tests/test_translate.py +6 -5
janus/converter/chain.py +100 -0
janus/converter/converter.py +467 -90
janus/converter/diagram.py +12 -8
janus/converter/document.py +17 -7
janus/converter/evaluate.py +174 -147
janus/converter/partition.py +6 -11
janus/converter/passthrough.py +29 -0
janus/converter/pool.py +74 -0
janus/converter/requirements.py +7 -40
janus/converter/translate.py +2 -58
janus/language/_tests/test_combine.py +1 -0
janus/language/block.py +115 -5
janus/llm/model_callbacks.py +6 -0
janus/llm/models_info.py +19 -0
janus/metrics/_tests/test_reading.py +48 -4
janus/metrics/_tests/test_rouge_score.py +5 -11
janus/metrics/metric.py +47 -124
janus/metrics/reading.py +48 -28
janus/metrics/rouge_score.py +21 -34
janus/parsers/_tests/test_code_parser.py +1 -1
janus/parsers/code_parser.py +2 -2
janus/parsers/eval_parsers/incose_parser.py +3 -3
janus/parsers/reqs_parser.py +3 -3
janus/prompts/templates/cyclic/human.txt +16 -0
janus/prompts/templates/cyclic/system.txt +1 -0
janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
janus/prompts/templates/extract_variables/human.txt +5 -0
janus/prompts/templates/extract_variables/system.txt +1 -0
{janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +14 -15
{janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +46 -40
{janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
janus/metrics/_tests/test_llm.py +0 -90
janus/metrics/llm_metrics.py +0 -202
{janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
{janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0

janus/converter/converter.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import json
 import time
+from copy import deepcopy
 from pathlib import Path
 from typing import Any
@@ -16,7 +17,7 @@ from openai import BadRequestError, RateLimitError
 from pydantic import ValidationError
 from janus.embedding.vectorize import ChromaDBVectorizer
-from janus.language.block import CodeBlock, TranslatedCodeBlock
+from janus.language.block import BlockCollection, CodeBlock, TranslatedCodeBlock
 from janus.language.combine import Combiner
 from janus.language.naive.registry import CUSTOM_SPLITTERS
 from janus.language.splitter import (
@@ -76,14 +77,22 @@ class Converter:
         source_language: str = "fortran",
         max_prompts: int = 10,
         max_tokens: int | None = None,
-        prompt_template: str = "simple",
+        prompt_templates: list[str] | str = ["simple"],
         db_path: str | None = None,
         db_config: dict[str, Any] | None = None,
         protected_node_types: tuple[str, ...] = (),
         prune_node_types: tuple[str, ...] = (),
         splitter_type: str = "file",
-        refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
+        refiner_types: list[type[JanusRefiner] | str] = [JanusRefiner],
         retriever_type: str | None = None,
+        combine_output: bool = True,
+        use_janus_inputs: bool = False,
+        target_language: str = "json",
+        target_version: str | None = None,
+        input_types: set[str] | str | None = None,
+        input_labels: set[str] | str | None = None,
+        output_type: str | None = None,
+        output_label: str | None = None,
     ) -> None:
         """Initialize a Converter instance.
@@ -96,7 +105,7 @@ class Converter:
             max_prompts: The maximum number of prompts to try before giving up.
             max_tokens: The maximum number of tokens to use in the LLM. If `None`, the
                 converter will use half the model's token limit.
-            prompt_template: The name of the prompt template to use.
+            prompt_templates: The name of the prompt templates to use.
             db_path: The path to the database to use for vectorization.
             db_config: The configuration for the database.
             protected_node_types: A set of node types that aren't to be merged.
@@ -111,12 +120,21 @@ class Converter:
                 - "active_usings"
                 - "language_docs"
                 - None
+            combine_output: Whether to combine the output into a single file or not.
+            use_janus_inputs: Whether to use janus inputs or not.
+            target_language: The target programming language.
+            target_version: The target programming language version.
+            input_types: The types of input to accept.
+            input_labels: The labels of input to accept.
+            output_type: The type of output to produce.
+            output_label: The label of output to produce.
         """
         self._changed_attrs: set = set()
         self.max_prompts: int = max_prompts
         self._max_tokens: int | None = max_tokens
         self.override_token_limit: bool = max_tokens is not None
+        self._combine_output = combine_output
         self._model_name: str
         self._custom_model_arguments: dict[str, Any]
@@ -124,13 +142,16 @@ class Converter:
         self._source_language: str
         self._source_suffixes: list[str]
-        self._target_language = "json"
-        self._target_suffix = ".json"
+        self._target_language: str
+        self._target_suffix: str
+        self._target_version: str | None
+        self.set_target_language(target_language, target_version)
+        self._use_janus_inputs = use_janus_inputs
         self._protected_node_types: tuple[str, ...] = ()
         self._prune_node_types: tuple[str, ...] = ()
         self._max_tokens: int | None = max_tokens
-        self._prompt_template_name: str
+        self._prompt_template_names: list[str]
         self._db_path: str | None
         self._db_config: dict[str, Any] | None
@@ -142,7 +163,7 @@ class Converter:
         self._combiner: Combiner = Combiner()
         self._splitter_type: str
-        self._refiner_types: list[type[JanusRefiner]]
+        self._refiner_types: list[type[JanusRefiner] | str]
         self._retriever_type: str | None
         self._splitter: Splitter
@@ -153,13 +174,20 @@ class Converter:
         self.set_refiner_types(refiner_types=refiner_types)
         self.set_retriever(retriever_type=retriever_type)
         self.set_model(model_name=model, **model_arguments)
-        self.set_prompt(prompt_template=prompt_template)
+        self.set_prompts(prompt_templates=prompt_templates)
         self.set_source_language(source_language)
         self.set_protected_node_types(protected_node_types)
         self.set_prune_node_types(prune_node_types)
         self.set_db_path(db_path=db_path)
         self.set_db_config(db_config=db_config)
+        self._input_types = input_types
+        self._input_labels = input_labels
+        self._output_type = output_type
+        self._output_label = output_label
+        self._load_parameters()
         # Child class must call this. Should we enforce somehow?
         # self._load_parameters()
@@ -174,7 +202,7 @@ class Converter:
     def _load_parameters(self) -> None:
         self._load_model()
-        self._load_prompt()
+        self._load_translation_chain()
         self._load_retriever()
         self._load_refiner_chain()
         self._load_splitter()
@@ -195,28 +223,30 @@ class Converter:
         self._model_name = model_name
         self._custom_model_arguments = custom_arguments
-    def set_prompt(self, prompt_template: str) -> None:
+    def set_prompts(self, prompt_templates: list[str] | str) -> None:
         """Validate and set the prompt template name.
         Arguments:
-            prompt_template: name of prompt template directory
-                (see janus/prompts/templates) or path to a directory.
+            prompt_templates: name of prompt template directories
+                (see janus/prompts/templates) or paths to directories.
         """
-        self._prompt_template_name = prompt_template
+        if isinstance(prompt_templates, str):
+            self._prompt_template_names = [prompt_templates]
+        else:
+            self._prompt_template_names = prompt_templates
     def set_splitter(self, splitter_type: str) -> None:
         """Validate and set the prompt template name.
         Arguments:
-            prompt_template: name of prompt template directory
-                (see janus/prompts/templates) or path to a directory.
+            splitter_type: the type of splitter to use
         """
         if splitter_type not in CUSTOM_SPLITTERS:
             raise ValueError(f'Splitter type "{splitter_type}" does not exist.')
         self._splitter_type = splitter_type
-    def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
+    def set_refiner_types(self, refiner_types: list[type[JanusRefiner] | str]) -> None:
         """Validate and set the refiner type
         Arguments:
@@ -329,25 +359,51 @@ class Converter:
             self._max_tokens = int(token_limit * self._llm.input_token_proportion)
     @run_if_changed(
-        "_prompt_template_name",
+        "_prompt_template_names",
         "_source_language",
         "_model_name",
-        "_parser",
+        "_target_language",
+        "_target_version",
     )
-    def _load_prompt(self) -> None:
-        """Load the prompt according to this instance's attributes.
-        If the relevant fields have not been changed since the last time this
-        method was called, nothing happens.
-        """
+    def _load_translation_chain(self) -> None:
+        prompt_template_name = self._prompt_template_names[0]
         prompt_engine = MODEL_PROMPT_ENGINES[self._llm.short_model_id](
             source_language=self._source_language,
-            prompt_template=self._prompt_template_name,
+            prompt_template=prompt_template_name,
+            target_language=self._target_language,
+            target_version=self._target_version,
         )
-        self._prompt = prompt_engine.prompt
-        self._prompt = self._prompt.partial(
-            format_instructions=self._parser.get_format_instructions()
+        prompt = prompt_engine.prompt
+        self._translation_chain = RunnableParallel(
+            prompt_value=lambda x, prompt=prompt: prompt.invoke(x),
+            original_inputs=RunnablePassthrough(),
+        ) | RunnableParallel(
+            completion=lambda x: self._llm.invoke(x["prompt_value"]),
+            original_inputs=lambda x: x["original_inputs"],
+            prompt_value=lambda x: x["prompt_value"],
         )
+        for prompt_template_name in self._prompt_template_names[1:]:
+            prompt_engine = MODEL_PROMPT_ENGINES[self._llm.short_model_id](
+                source_language=self._source_language,
+                prompt_template=prompt_template_name,
+                target_language=self._target_language,
+                target_version=self._target_version,
+            )
+            prompt = prompt_engine.prompt
+            self._translation_chain = (
+                self._translation_chain
+                | RunnableParallel(
+                    prompt_value=lambda x, prompt=prompt: prompt.invoke(
+                        dict(completion=x["completion"], **x["original_inputs"])
+                    ),
+                    original_inputs=lambda x: x["original_inputs"],
+                )
+                | RunnableParallel(
+                    completion=lambda x: self._llm.invoke(x["prompt_value"]),
+                    original_inputs=lambda x: x["original_inputs"],
+                    prompt_value=lambda x: x["prompt_value"],
+                )
+            )
     @run_if_changed("_db_path", "_db_config")
     def _load_vectorizer(self) -> None:
@@ -370,11 +426,41 @@ class Converter:
     @run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
     def _load_refiner_chain(self) -> None:
-        self._refiner_chain = RunnableParallel(
-            completion=self._llm,
-            prompt_value=RunnablePassthrough(),
-        )
-        for refiner_type in self._refiner_types[:-1]:
+        from janus.cli.constants import REFINERS
+        if len(self._refiner_types) == 0:
+            self._refiner_chain = RunnableLambda(
+                lambda x: self._parser.parse(x["completion"])
+            )
+            return
+        refiner_type = self._refiner_types[0]
+        if isinstance(refiner_type, str):
+            if refiner_type not in REFINERS:
+                raise ValueError(f"Error: unable to find refiner type {refiner_type}")
+            refiner_type = REFINERS[refiner_type]
+        if len(self._refiner_types) == 1:
+            self._refiner_chain = RunnableLambda(
+                lambda x, refiner_type=refiner_type: refiner_type(
+                    llm=self._llm,
+                    parser=self._parser,
+                    max_retries=self.max_prompts,
+                ).parse_completion(**x)
+            )
+            return
+        else:
+            self._refiner_chain = RunnableParallel(
+                completion=lambda x, refiner_type=refiner_type: refiner_type(
+                    llm=self._llm,
+                    parser=self._base_parser,
+                    max_retries=self.max_prompts,
+                ).parse_completion(**x),
+                prompt_value=lambda x: x["prompt_value"],
+            )
+        for refiner_type in self._refiner_types[1:-1]:
+            if isinstance(refiner_type, str):
+                if refiner_type not in REFINERS:
+                    raise ValueError(f"Error: unable to find refiner type {refiner_type}")
+                refiner_type = REFINERS[refiner_type]
             # NOTE: Do NOT remove refiner_type=refiner_type from lambda.
             # Due to lambda capture, must be present or chain will not
             # be correctly constructed.
@@ -394,9 +480,17 @@ class Converter:
             ).parse_completion(**x)
         )
-    @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
+    @run_if_changed(
+        "_parser",
+        "_retriever",
+        "_prompt",
+        "_llm",
+        "_refiner_chain",
+        "_target_language",
+        "_target_version",
+    )
     def _load_chain(self):
-        self.chain = self._input_runnable() | self._prompt | self._refiner_chain
+        self.chain = self.get_chain()
     def _input_runnable(self) -> Runnable:
         return RunnableParallel(
@@ -404,6 +498,12 @@ class Converter:
             context=self._retriever,
         )
+    def get_chain(self) -> Runnable:
+        """
+        Gets a chain that can be executed by langchain
+        """
+        return self._input_runnable() | self._translation_chain | self._refiner_chain
     def translate(
         self,
         input_directory: str | Path,
@@ -436,22 +536,24 @@ class Converter:
             failure_directory.mkdir(parents=True)
         input_paths = []
-        for ext in self._source_suffixes:
+        if self._use_janus_inputs:
+            source_language = "janus"
+            source_suffixes = [".json"]
+        else:
+            source_language = self._source_language
+            source_suffixes = self._source_suffixes
+        for ext in source_suffixes:
             input_paths.extend(input_directory.rglob(f"**/*{ext}"))
         log.info(f"Input directory: {input_directory.absolute()}")
-        log.info(
-            f"{self._source_language} {self._source_suffixes} files: "
-            f"{len(input_paths)}"
-        )
+        log.info(f"{source_language} {source_suffixes} files: " f"{len(input_paths)}")
         log.info(
             "Other files (skipped): "
             f"{len(list(input_directory.iterdir())) - len(input_paths)}\n"
         )
         if output_directory is not None:
             output_paths = [
-                output_directory
-                / p.relative_to(input_directory).with_suffix(self._target_suffix)
+                output_directory / p.relative_to(input_directory).with_suffix(".json")
                 for p in input_paths
             ]
         else:
@@ -459,8 +561,7 @@ class Converter:
         if failure_directory is not None:
             failure_paths = [
-                failure_directory
-                / p.relative_to(input_directory).with_suffix(self._target_suffix)
+                failure_directory / p.relative_to(input_directory).with_suffix(".json")
                 for p in input_paths
             ]
         else:
@@ -484,12 +585,31 @@ class Converter:
         for in_path, out_path, fail_path in in_out_pairs:
             # Translate the file, skip it if there's a rate limit error
             log.info(f"Processing {in_path.relative_to(input_directory)}")
-            out_block = self.translate_file(in_path, fail_path)
-            total_cost += out_block.total_cost
+            if self._use_janus_inputs:
+                out_block = self.translate_janus_file(in_path, fail_path)
+            else:
+                out_block = self.translate_file(in_path, fail_path)
+            def _get_total_cost(block):
+                if isinstance(block, list):
+                    return sum(_get_total_cost(b) for b in block)
+                return block.total_cost
+            total_cost += _get_total_cost(out_block)
             log.info(f"Current Running Cost: {total_cost}")
-            # Don't attempt to write files for which translation failed
-            if not out_block.translated:
+            # For files where translation failed, write to failure path instead
+            def _has_empty(block):
+                if isinstance(block, BlockCollection):
+                    return len(block.blocks) == 0 or any(
+                        _has_empty(b) for b in block.blocks
+                    )
+                return not block.translated
+            if _has_empty(out_block):
+                if fail_path is not None:
+                    self._save_to_file(out_block, fail_path)
                 continue
             if collection_name is not None:
@@ -501,37 +621,58 @@ class Converter:
             # Make sure the tree's code has been consolidated at the top level
             #  before writing to file
-            self._combiner.combine(out_block)
+            for b in out_block.blocks:
+                self._combiner.combine(b)
             if out_path is not None and (overwrite or not out_path.exists()):
                 self._save_to_file(out_block, out_path)
         log.info(f"Total cost: ${total_cost:,.2f}")
-    def translate_file(
-        self, file: Path, failure_path: Path | None = None
-    ) -> TranslatedCodeBlock:
-        """Translate a single file.
-        Arguments:
-            file: Input path to file
-            failure_path: path to directory to store failure summaries`
+    def _filter_blocks(self, code_block):
+        if isinstance(code_block, BlockCollection):
+            input_blocks = list(code_block.blocks)
+        else:
+            input_blocks = [code_block]
+        if self._input_types is not None:
+            if isinstance(self._input_types, str):
+                self._input_types = set([self._input_types])
+            input_blocks = [
+                b
+                for b in input_blocks
+                if isinstance(b, BlockCollection) or b.block_type in self._input_types
+            ]
+        if self._input_labels is not None:
+            if isinstance(self._input_labels, str):
+                self._input_labels = set([self._input_labels])
+            input_blocks = [
+                b
+                for b in input_blocks
+                if isinstance(b, BlockCollection) or b.block_label in self._input_labels
+            ]
+        return input_blocks
-        Returns:
-            A `TranslatedCodeBlock` object. This block does not have a path set, and its
-            code is not guaranteed to be consolidated. To amend this, run
-            `Combiner.combine_children` on the block.
-        """
+    def translate_blocks(
+        self,
+        code_block: CodeBlock | BlockCollection,
+        failure_path: Path | None = None,
+    ) -> BlockCollection | TranslatedCodeBlock:
+        input_blocks = self._filter_blocks(code_block)
+        output_blocks = []
+        for b in input_blocks:
+            output_blocks.append(self.translate_block(b, failure_path))
+        return BlockCollection(output_blocks, code_block.previous_generations)
+    def translate_block(
+        self,
+        input_block: CodeBlock,
+        failure_path: Path | None = None,
+    ) -> TranslatedCodeBlock:
         self._load_parameters()
-        filename = file.name
-        input_block = self._split_file(file)
-        t0 = time.time()
         output_block = self._iterative_translate(input_block, failure_path)
-        output_block.processing_time = time.time() - t0
         if output_block.translated:
             completeness = output_block.translation_completeness
             log.info(
-                f"[{filename}] Translation complete\n"
+                f"[{output_block.name}] Translation complete\n"
                 f"  {completeness:.2%} of input successfully translated\n"
                 f"  Total cost: ${output_block.total_cost:,.2f}\n"
                 f"  Output CodeBlock Structure:\n{input_block.tree_str()}\n"
@@ -539,11 +680,51 @@ class Converter:
         else:
             log.error(
-                f"[{filename}] Translation failed\n"
+                f"[{output_block.name}] Translation failed\n"
                 f"  Total cost: ${output_block.total_cost:,.2f}\n"
             )
         return output_block
+    def translate_file(
+        self,
+        file: Path,
+        failure_path: Path | None = None,
+    ) -> TranslatedCodeBlock:
+        """Translate a single file.
+        Arguments:
+            file: Input path to file
+            failure_path: path to directory to store failure summaries`
+        Returns:
+            A `TranslatedCodeBlock` object. This block does not have a path set, and its
+            code is not guaranteed to be consolidated. To amend this, run
+            `Combiner.combine_children` on the block.
+        """
+        input_block = self._split_file(file)
+        return self.translate_blocks(input_block, failure_path)
+    def translate_janus_file(self, file: Path, failure_path: Path | None = None):
+        filename = file.name
+        with open(file, "r") as f:
+            file_obj = json.load(f)
+        return self.translate_janus_obj(file_obj, filename, failure_path)
+    def translate_janus_obj(self, obj: Any, name: str, failure_path: Path | None = None):
+        block = self._janus_object_to_codeblock(obj, name)
+        return self.translate_blocks(block, failure_path)
+    def translate_text(self, text: str, name: str, failure_path: Path | None = None):
+        """
+        Translates given text
+        Arguments:
+            text: text to translate
+            name: the name of the text (filename if from a file)
+            failure_path: path to write failure file if translation is not successful
+        """
+        input_block = self._split_text(text, name)
+        return self.translate_blocks(input_block, failure_path)
     def _iterative_translate(
         self, root: CodeBlock, failure_path: Path | None = None
     ) -> TranslatedCodeBlock:
@@ -556,7 +737,13 @@ class Converter:
         Returns:
             A `TranslatedCodeBlock`
         """
-        translated_root = TranslatedCodeBlock(root, self._target_language)
+        translated_root = TranslatedCodeBlock(
+            root,
+            self._target_language,
+            self,
+            block_type=self._output_type,
+            block_label=self._output_label,
+        )
         last_prog, prog_delta = 0, 0.1
         stack = [translated_root]
         try:
@@ -579,7 +766,7 @@ class Converter:
         except RateLimitError:
             pass
         except OutputParserException as e:
-            log.error(f"Skipping file, failed to parse output: {e}.")
+            log.error(f"Skipping file, failed to parse output: {e}")
         except BadRequestError as e:
             if str(e).startswith("Detected an error in the prompt"):
                 log.warning("Malformed input, skipping")
@@ -607,9 +794,10 @@ class Converter:
                 )
             raise e
         finally:
-            log.debug(
-                f"Resulting Block: {json.dumps(self._get_output_obj(translated_root))}"
+            out_obj = self._get_output_obj(
+                translated_root, self._combine_output, include_previous_outputs=True
             )
+            log.debug(f"Resulting Block:" f"{json.dumps(out_obj)}")
             if not translated_root.translated:
                 if failure_path is not None:
                     self._save_to_file(translated_root, failure_path)
@@ -666,6 +854,16 @@ class Converter:
         log.debug(f"[{block.name}] Output code:\n{block.text}")
+    def _split_text(self, text: str, name: str) -> CodeBlock:
+        log.info(f"[{name}] Splitting text")
+        root = self._splitter.split_string(text, name)
+        log.info(
+            f"[{name}] Text split into {root.n_descendents:,} blocks,"
+            f"tree of height {root.height}"
+        )
+        log.info(f"[{name}] Input CodeBlock Structure:\n{root.tree_str()}")
+        return root
     def _split_file(self, file: Path) -> CodeBlock:
         filename = file.name
         log.info(f"[{filename}] Splitting file")
@@ -680,31 +878,113 @@ class Converter:
     def _run_chain(self, block: TranslatedCodeBlock) -> str:
         return self.chain.invoke(block.original)
+    def _combine_metadata(self, metadatas: list[dict]):
+        return dict(
+            cost=sum(m["cost"] for m in metadatas),
+            processing_time=sum(m["processing_time"] for m in metadatas),
+            num_requests=sum(m["num_requests"] for m in metadatas),
+            input_tokens=sum(m["input_tokens"] for m in metadatas),
+            output_tokens=sum(m["output_tokens"] for m in metadatas),
+            converter_name=self.__class__.__name__,
+            type=[m["type"] for m in metadatas],
+            label=[m["label"] for m in metadatas],
+        )
+    def _combine_inputs(self, inputs: list[str]):
+        return json.dumps(inputs)
     def _get_output_obj(
-        self, block: TranslatedCodeBlock
+        self,
+        block: TranslatedCodeBlock | BlockCollection | dict,
+        combine_children: bool = True,
+        include_previous_outputs: bool = True,
     ) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
-        output_obj: str | dict[str, str]
-        if not block.translation_completed:
-            # translation wasn't completed, so combined parsing will likely fail
-            output_obj = block.complete_text
+        block_type = None
+        block_label = None
+        if isinstance(block, dict):
+            # output object has already been generated
+            new_block = deepcopy(block)
+            if "intermediate_outputs" in new_block:
+                del new_block["intermediate_outputs"]
+            return new_block
+        if isinstance(block, BlockCollection):
+            if len(block.blocks) == 1:
+                outputs = self._get_output_obj(block.blocks[0], combine_children, False)[
+                    "outputs"
+                ]
+                block_type = block.blocks[0].block_type
+                block_label = block.blocks[0].block_label
+            else:
+                outputs = [
+                    self._get_output_obj(b, combine_children, False) for b in block.blocks
+                ]
+        elif (
+            not isinstance(block, BlockCollection)
+            and not combine_children
+            and len(block.children) > 0
+        ):
+            outputs = self._get_output_obj_children(block, False)
         else:
-            output_str = self._parser.parse_combined_output(block.complete_text)
-            try:
-                output_obj = json.loads(output_str)
-            except json.JSONDecodeError:
-                output_obj = output_str
-        return dict(
-            input=block.original.text or "",
+            block_type = block.block_type
+            block_label = block.block_label
+            if not block.translation_completed:
+                # translation wasn't completed, so combined parsing will likely fail
+                outputs = [block.complete_text]
+            else:
+                output_str = self._parser.parse_combined_output(block.complete_text)
+                outputs = [output_str]
+        def _get_input(block):
+            if isinstance(block, BlockCollection):
+                return self._combine_inputs([_get_input(b) for b in block.blocks])
+            return block.original.text or ""
+        out = dict(
+            input=_get_input(block),
             metadata=dict(
                 cost=block.total_cost,
-                processing_time=block.processing_time,
+                processing_time=block.total_processing_time,
                 num_requests=block.total_num_requests,
                 input_tokens=block.total_request_input_tokens,
                 output_tokens=block.total_request_output_tokens,
+                converter_name=self.__class__.__name__,
+                type=block_type,
+                label=block_label,
             ),
-            output=output_obj,
+            outputs=outputs,
         )
+        if (
+            include_previous_outputs
+            and isinstance(block, BlockCollection)
+            and len(block.previous_generations) > 0
+        ):
+            intermediate_outputs = []
+            for p in block.previous_generations:
+                if isinstance(p, dict):
+                    # preserve intermediate outputs from previous runs
+                    intermediate_outputs.append(
+                        self._get_output_obj(p, combine_children, False)
+                    )
+            if len(intermediate_outputs) > 0:
+                out["intermediate_outputs"] = intermediate_outputs
+        return out
+    def _get_output_obj_children(
+        self, block: TranslatedCodeBlock, include_previous_outputs: bool = True
+    ):
+        if len(block.children) > 0:
+            res = []
+            for c in block.children:
+                res += self._get_output_obj_children(c, include_previous_outputs)
+            return res
+        else:
+            return [
+                self._get_output_obj(
+                    block,
+                    combine_children=True,
+                    include_previous_outputs=include_previous_outputs,
+                )
+            ]
     def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
         """Save a file to disk.
@@ -712,6 +992,103 @@ class Converter:
         Arguments:
             block: The `TranslatedCodeBlock` to save to a file.
         """
-        obj = self._get_output_obj(block)
+        obj = self._get_output_obj(
+            block, combine_children=self._combine_output, include_previous_outputs=True
+        )
         out_path.parent.mkdir(parents=True, exist_ok=True)
         out_path.write_text(json.dumps(obj, indent=2), encoding="utf-8")
+    def _janus_object_to_codeblock(self, janus_obj: dict, name: str):
+        results = []
+        for o in janus_obj["outputs"]:
+            metadata = janus_obj["metadata"]
+            if isinstance(o, str):
+                block_label = metadata["label"]
+                if isinstance(block_label, list):
+                    block_label = block_label[0]
+                block_type = metadata["type"]
+                if isinstance(block_type, list):
+                    block_type = block_type[0]
+                code_block = self._split_text(o, name)
+                code_block.previous_generations = janus_obj.get(
+                    "intermediate_outputs", []
+                ) + [janus_obj]
+                code_block.block_type = block_type
+                code_block.block_label = block_label
+                results.append(code_block)
+            else:
+                results += self._janus_object_to_codeblock(o, name).blocks
+        previous_generations = janus_obj.get("intermediate_outputs", [])
+        if janus_obj["metadata"]["converter_name"] != "ConverterChain":
+            previous_generations += [janus_obj]
+        return BlockCollection(results, previous_generations)
+    def __or__(self, other: "Converter"):
+        from janus.converter.chain import ConverterChain
+        return ConverterChain(self, other)
+    @property
+    def source_language(self):
+        return self._source_language
+    @property
+    def target_language(self):
+        return self._target_language
+    @property
+    def target_version(self):
+        return self._target_version
+    def set_target_language(
+        self, target_language: str, target_version: str | None
+    ) -> None:
+        """Validate and set the target language.
+        The affected objects will not be updated until translate() is called.
+        Arguments:
+            target_language: The target programming language.
+            target_version: The target version of the target programming language.
+        """
+        target_language = target_language.lower()
+        if target_language not in LANGUAGES:
+            raise ValueError(
+                f"Invalid target language: {target_language}. "
+                "Valid target languages are found in `janus.utils.enums.LANGUAGES`."
+            )
+        self._target_language = target_language
+        self._target_version = target_version
+        # Taking the first suffix as the default for output files
+        self._target_suffix = f".{LANGUAGES[target_language]['suffixes'][0]}"
+    @classmethod
+    def eval_obj(cls, target, metric_func, *args, **kwargs):
+        if "reference" in kwargs:
+            return cls.eval_obj_reference(target, metric_func, *args, **kwargs)
+        else:
+            return cls.eval_obj_noreference(target, metric_func, *args, **kwargs)
+    @classmethod
+    def eval_obj_noreference(cls, target, metric_func, *args, **kwargs):
+        results = []
+        for o in target["outputs"]:
+            if isinstance(o, dict):
+                results += cls.eval_obj_noreference(o, metric_func, *args, **kwargs)
+            else:
+                results.append(metric_func(o, *args, **kwargs))
+        return results
+    @classmethod
+    def eval_obj_reference(cls, target, metric_func, reference, *args, **kwargs):
+        results = []
+        for o, r in zip(target["outputs"], reference["outputs"]):
+            if isinstance(o, dict):
+                if not isinstance(r, dict):
+                    raise ValueError("Error: format of reference doesn't match target")
+                results += cls.eval_obj_reference(o, metric_func, r, *args, **kwargs)
+            else:
+                if isinstance(r, dict):
+                    raise ValueError("Error: format of reference doesn't match target")
+                results.append(metric_func(o, r, *args, **kwargs))
+        return results

janus-llm 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl

janus-llm 4.3.5py3-none-any.whl → 4.5.4py3-none-any.whl