PyPI - janus-llm - Versions diffs - 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl - Mend

janus-llm 4.3.1py3-none-any.whl → 4.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

janus/__init__.py +1 -1
janus/__main__.py +1 -1
janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
janus/_tests/test_cli.py +3 -2
janus/cli/aggregate.py +135 -0
janus/cli/cli.py +117 -0
janus/cli/constants.py +49 -0
janus/cli/database.py +289 -0
janus/cli/diagram.py +207 -0
janus/cli/document.py +183 -0
janus/cli/embedding.py +122 -0
janus/cli/llm.py +191 -0
janus/cli/partition.py +134 -0
janus/cli/pipeline.py +123 -0
janus/cli/self_eval.py +147 -0
janus/cli/translate.py +192 -0
janus/converter/__init__.py +1 -1
janus/converter/_tests/test_translate.py +7 -5
janus/converter/chain.py +180 -0
janus/converter/converter.py +444 -153
janus/converter/diagram.py +8 -6
janus/converter/document.py +27 -16
janus/converter/evaluate.py +143 -144
janus/converter/partition.py +2 -10
janus/converter/requirements.py +4 -40
janus/converter/translate.py +3 -59
janus/embedding/collections.py +1 -1
janus/language/alc/_tests/alc.asm +3779 -0
janus/language/binary/_tests/hello.bin +0 -0
janus/language/block.py +78 -14
janus/language/file.py +1 -1
janus/language/mumps/_tests/mumps.m +235 -0
janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
janus/language/treesitter/_tests/languages/matlab.m +225 -0
janus/llm/models_info.py +9 -1
janus/metrics/_tests/asm_test_file.asm +10 -0
janus/metrics/_tests/mumps_test_file.m +6 -0
janus/metrics/_tests/test_treesitter_metrics.py +1 -1
janus/metrics/metric.py +47 -124
janus/metrics/prompts/clarity.txt +8 -0
janus/metrics/prompts/completeness.txt +16 -0
janus/metrics/prompts/faithfulness.txt +10 -0
janus/metrics/prompts/hallucination.txt +16 -0
janus/metrics/prompts/quality.txt +8 -0
janus/metrics/prompts/readability.txt +16 -0
janus/metrics/prompts/usefulness.txt +16 -0
janus/parsers/code_parser.py +4 -4
janus/parsers/doc_parser.py +12 -9
janus/parsers/parser.py +7 -0
janus/parsers/partition_parser.py +6 -4
janus/parsers/reqs_parser.py +11 -8
janus/parsers/uml.py +5 -4
janus/prompts/prompt.py +2 -2
janus/prompts/templates/README.md +30 -0
janus/prompts/templates/basic_aggregation/human.txt +6 -0
janus/prompts/templates/basic_aggregation/system.txt +1 -0
janus/prompts/templates/basic_refinement/human.txt +14 -0
janus/prompts/templates/basic_refinement/system.txt +1 -0
janus/prompts/templates/diagram/human.txt +9 -0
janus/prompts/templates/diagram/system.txt +1 -0
janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
janus/prompts/templates/document/human.txt +10 -0
janus/prompts/templates/document/system.txt +1 -0
janus/prompts/templates/document_cloze/human.txt +11 -0
janus/prompts/templates/document_cloze/system.txt +1 -0
janus/prompts/templates/document_cloze/variables.json +4 -0
janus/prompts/templates/document_cloze/variables_asm.json +4 -0
janus/prompts/templates/document_inline/human.txt +13 -0
janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
janus/prompts/templates/multidocument/human.txt +15 -0
janus/prompts/templates/multidocument/system.txt +1 -0
janus/prompts/templates/partition/human.txt +22 -0
janus/prompts/templates/partition/system.txt +1 -0
janus/prompts/templates/partition/variables.json +4 -0
janus/prompts/templates/pseudocode/human.txt +7 -0
janus/prompts/templates/pseudocode/system.txt +7 -0
janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
janus/prompts/templates/refinement/hallucination/human.txt +13 -0
janus/prompts/templates/refinement/hallucination/system.txt +1 -0
janus/prompts/templates/refinement/reflection/human.txt +15 -0
janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/reflection/system.txt +1 -0
janus/prompts/templates/refinement/revision/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/revision/system.txt +1 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
janus/prompts/templates/requirements/human.txt +13 -0
janus/prompts/templates/requirements/system.txt +2 -0
janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
janus/prompts/templates/simple/human.txt +16 -0
janus/prompts/templates/simple/system.txt +3 -0
janus/refiners/format.py +49 -0
janus/refiners/refiner.py +113 -4
janus/utils/enums.py +127 -112
janus/utils/logger.py +2 -0
{janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
janus_llm-4.4.5.dist-info/RECORD +210 -0
{janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
janus/cli.py +0 -1488
janus/metrics/_tests/test_llm.py +0 -90
janus/metrics/llm_metrics.py +0 -202
janus_llm-4.3.1.dist-info/RECORD +0 -115
janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
{janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0

janus/llm/models_info.py CHANGED Viewed

@@ -50,6 +50,7 @@ except ImportError:
 ModelType = TypeVar(
     "ModelType",
     AzureChatOpenAI,
+    ChatOpenAI,
     HuggingFaceTextGenInference,
     Bedrock,
     BedrockChat,
@@ -247,6 +248,7 @@ def load_model(model_id) -> JanusModel:
         token_limit = model_config["token_limit"]
         input_token_cost = model_config["model_cost"]["input"]
         output_token_cost = model_config["model_cost"]["output"]
+        input_token_proportion = model_config["input_token_proportion"]
     elif model_id in DEFAULT_MODELS:
         model_id = model_id
@@ -257,6 +259,7 @@ def load_model(model_id) -> JanusModel:
         token_limit = 0
         input_token_cost = 0.0
         output_token_cost = 0.0
+        input_token_proportion = 0.4
         if model_long_id in TOKEN_LIMITS:
             token_limit = TOKEN_LIMITS[model_long_id]
         if model_long_id in COST_PER_1K_TOKENS:
@@ -286,7 +289,6 @@ def load_model(model_id) -> JanusModel:
     elif model_type_name == "OpenAI":
         model_args.update(
             openai_api_key=str(os.getenv("OPENAI_API_KEY")),
-            openai_organization=str(os.getenv("OPENAI_ORG_ID")),
         )
         # log.warning("Do NOT use this model in sensitive environments!")
         # log.warning("If you would like to cancel, please press Ctrl+C.")
@@ -310,15 +312,20 @@ def load_model(model_id) -> JanusModel:
     class JanusModel(model_type):
         model_id: str
+        # model_name is for LangChain compatibility
+        # It searches for `self.model_name` when counting tokens
+        model_name: str
         short_model_id: str
         model_type_name: str
         token_limit: int
+        input_token_proportion: float
         input_token_cost: float
         output_token_cost: float
         prompt_engine: type[PromptEngine]
     model_args.update(
         model_id=MODEL_ID_TO_LONG_ID[model_id],
+        model_name=model_id,  # This is for LangChain compatibility
         short_model_id=model_id,
     )
@@ -327,6 +334,7 @@ def load_model(model_id) -> JanusModel:
         token_limit=token_limit,
         input_token_cost=input_token_cost,
         output_token_cost=output_token_cost,
+        input_token_proportion=input_token_proportion,
         prompt_engine=prompt_engine,
         **model_args,
     )

janus/metrics/_tests/asm_test_file.asm ADDED Viewed

@@ -0,0 +1,10 @@
+NAME     OPA   OPSA,OPSB
+         OPB   OPSC,OPSC REMARK
+NAME     OPC   OPSA,OPSB
+         OPD   OPSA,OPSB REMARK2
+         B     OPSA
+         OPD
+         B     OPSB
+NAME     OPC   OPSA,OPSB
+         OPC
+         OPC   OPSA,OPSB

janus/metrics/_tests/mumps_test_file.m ADDED Viewed

@@ -0,0 +1,6 @@
+FUNC(a, b) ; apples
+  set apples=8
+  write a,!
+  write a,!
+  if abc=70 set f=1
+  quit 0

janus/metrics/_tests/test_treesitter_metrics.py CHANGED Viewed

@@ -3,7 +3,7 @@ from pathlib import Path
 from typer.testing import CliRunner
-from janus.cli import app
+from janus.cli.cli import app
 from janus.metrics.complexity_metrics import (
     TreeSitterMetric,
     cyclomatic_complexity,

janus/metrics/metric.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import inspect
 import json
-from pathlib import Path
 from typing import Callable, Optional
 import click
 import typer
 from typing_extensions import Annotated
+from janus.cli.constants import CONVERTERS
+from janus.converter.converter import Converter
 from janus.llm import load_model
 from janus.llm.model_callbacks import COST_PER_1K_TOKENS
 from janus.metrics.cli import evaluate
@@ -70,31 +71,6 @@ def metric(
                         help="Reference file or string to use as reference/baseline.",
                     ),
                 ] = None,
-                json_file_name: Annotated[
-                    Optional[str],
-                    typer.Option(
-                        "--json",
-                        "-j",
-                        help="Json file to extract pairs from \
-                            (if set ignores --target and --reference)",
-                    ),
-                ] = None,
-                target_key: Annotated[
-                    str,
-                    typer.Option(
-                        "--target-key",
-                        "-tk",
-                        help="json key to extract list of target strings",
-                    ),
-                ] = "target",
-                reference_key: Annotated[
-                    str,
-                    typer.Option(
-                        "--reference-key",
-                        "-rk",
-                        help="json key to extract list of reference strings",
-                    ),
-                ] = "reference",
                 file_pairing_method: Annotated[
                     str,
                     typer.Option(
@@ -123,6 +99,14 @@ def metric(
                         is_flag=True,
                     ),
                 ] = False,
+                use_janus_inputs: Annotated[
+                    bool,
+                    typer.Option(
+                        "-j",
+                        "--use-janus-inputs",
+                        help="present if janus output files should be evaluated",
+                    ),
+                ] = False,
                 use_strings: Annotated[
                     bool,
                     typer.Option(
@@ -137,25 +121,23 @@ def metric(
             ):
                 out = []
                 llm = load_model(llm_name)
-                if json_file_name is not None:
-                    with open(json_file_name, "r") as f:
-                        json_obj = json.load(f)
-                    pairs = {}
-                    for key in json_obj:
-                        doc = json_obj[key]
-                        ref = doc[reference_key]
-                        experiments = doc["experiments"]
-                        for model_key in experiments:
-                            model_dict = experiments[model_key]
-                            if not isinstance(model_dict, dict):
-                                continue
-                            if target_key not in model_dict:
-                                continue
-                            if model_key not in pairs:
-                                pairs[model_key] = {}
-                            for k in model_dict[target_key]:
-                                pairs[model_key][k] = (model_dict[target_key][k], ref[k])
-                elif target is not None and reference is not None:
+                if use_janus_inputs:
+                    with open(target, "r") as f:
+                        target_obj = json.load(f)
+                    with open(reference, "r") as f:
+                        reference_obj = json.load(f)
+                    converter_cls = CONVERTERS.get(
+                        target_obj["metadata"].get("converter_name", "Converter"),
+                        Converter,
+                    )
+                    out = converter_cls.eval_obj_reference(
+                        target=target_obj,
+                        reference=reference_obj,
+                        metric_func=function,
+                        *args,
+                        **kwargs,
+                    )
+                else:
                     if use_strings:
                         target_contents = target
                         reference_contents = reference
@@ -175,25 +157,6 @@ def metric(
                         token_limit=llm.token_limit,
                         model_cost=COST_PER_1K_TOKENS[llm.model_id],
                     )
-                else:
-                    raise ValueError(
-                        "Error, specify json or target and reference files/strings"
-                    )
-                if isinstance(pairs, dict):
-                    out = {}
-                    for k in pairs:
-                        out[k] = apply_function_pairs(
-                            pairs[k],
-                            function,
-                            progress,
-                            language,
-                            llm,
-                            llm.token_limit,
-                            COST_PER_1K_TOKENS[llm.model_id],
-                            *args,
-                            **kwargs,
-                        )
-                else:
                     out = apply_function_pairs(
                         pairs,
                         function,
@@ -205,17 +168,15 @@ def metric(
                         *args,
                         **kwargs,
                     )
-                out_file = Path(out_file)
-                out_file.parent.mkdir(parents=True, exist_ok=True)
                 with open(out_file, "w") as f:
+                    log.info(f"Writing output to {out_file}")
                     json.dump(out, f)
-                    log.info(f"Saved results to file: {out_file}")
             sig1 = inspect.signature(function)
             sig2 = inspect.signature(func)
             func.__signature__ = sig2.replace(
                 parameters=tuple(
-                    list(sig2.parameters.values())[:11]
+                    list(sig2.parameters.values())[:9]
                     + list(sig1.parameters.values())[2:-1]
                 )
             )
@@ -241,23 +202,14 @@ def metric(
                         "--target", "-t", help="Target file or string to evaluate."
                     ),
                 ] = None,
-                json_file_name: Annotated[
-                    Optional[str],
+                use_janus_inputs: Annotated[
+                    bool,
                     typer.Option(
-                        "--json",
                         "-j",
-                        help="Json file to extract pairs from \
-                            (if set ignores --target)",
+                        "--use-janus-inputs",
+                        help="whether to use a janus output file as input",
                     ),
-                ] = None,
-                target_key: Annotated[
-                    str,
-                    typer.Option(
-                        "--target-key",
-                        "-tk",
-                        help="json key to extract list of target strings",
-                    ),
-                ] = "target",
+                ] = False,
                 splitting_method: Annotated[
                     str,
                     typer.Option(
@@ -298,25 +250,17 @@ def metric(
                 **kwargs,
             ):
                 llm = load_model(llm_name)
-                if json_file_name is not None:
-                    with open(json_file_name, "r") as f:
-                        json_obj = json.load(f)
-                    strings = {}
-                    for key in json_obj:
-                        doc = json_obj[key]
-                        experiments = doc["experiments"]
-                        for model_key in experiments:
-                            model_dict = experiments[model_key]
-                            if not isinstance(model_dict, dict):
-                                continue
-                            if target_key not in model_dict:
-                                continue
-                            if model_key not in strings:
-                                strings[model_key] = {}
-                            for k in model_dict[target_key]:
-                                strings[model_key][k] = model_dict[target_key][k]
-                        # strings += list(json_obj[key][target_key].values())
-                elif target is not None:
+                if use_janus_inputs:
+                    with open(target, "r") as f:
+                        target_obj = json.load(f)
+                    converter_cls = CONVERTERS.get(
+                        target_obj["metadata"].get("converter_name", "Converter"),
+                        Converter,
+                    )
+                    out = converter_cls.eval_obj(
+                        target=target_obj, metric_func=function, *args, **kwargs
+                    )
+                else:
                     if use_strings:
                         target_contents = target
                     else:
@@ -332,25 +276,6 @@ def metric(
                         token_limit=llm.token_limit,
                         model_cost=COST_PER_1K_TOKENS[llm.model_id],
                     )
-                else:
-                    raise ValueError(
-                        "Error: must specify either json file or target file/string"
-                    )
-                if isinstance(strings, dict):
-                    out = {}
-                    for k in strings:
-                        out[k] = apply_function_strings(
-                            strings[k],
-                            function,
-                            progress,
-                            language,
-                            llm,
-                            llm.token_limit,
-                            COST_PER_1K_TOKENS[llm.model_id],
-                            *args,
-                            **kwargs,
-                        )
-                else:
                     out = apply_function_strings(
                         strings,
                         function,
@@ -362,17 +287,15 @@ def metric(
                         *args,
                         **kwargs,
                     )
-                out_file = Path(out_file)
-                out_file.parent.mkdir(parents=True, exist_ok=True)
                 with open(out_file, "w") as f:
+                    log.info(f"Writing output to {out_file}")
                     json.dump(out, f)
-                    log.info(f"Saved results to file: {out_file}")
             sig1 = inspect.signature(function)
             sig2 = inspect.signature(func)
             func.__signature__ = sig2.replace(
                 parameters=tuple(
-                    list(sig2.parameters.values())[:9]
+                    list(sig2.parameters.values())[:7]
                     + list(sig1.parameters.values())[1:-1]
                 )
             )

janus/metrics/prompts/clarity.txt ADDED Viewed

@@ -0,0 +1,8 @@
+Based on the following target written in the {language} programming language, how would you rate the code clarity of the target on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/completeness.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Does the comment address all capabilities of the relevant source code?
+10 - All essential functionality is documented.
+6-9 - Most essential functionality is documented.
+2-5 - Little essential functionality is documented.
+1 - No essential functionality is documented.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/faithfulness.txt ADDED Viewed

@@ -0,0 +1,10 @@
+Based on the following target and reference written in the {language} programming language, how would you rate the faithfulness of the target to the original reference on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+Reference: the reference code that we are judging the target against
+{format_instructions}
+Target: {target}
+Reference: {reference}

janus/metrics/prompts/hallucination.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Does the comment provide true information?
+10 - The comment provides only true information.
+6-9 - The comment provides mostly true information.
+2-5 - The comment provides mostly untrue information.
+1 - The comment is completely untrue.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/quality.txt ADDED Viewed

@@ -0,0 +1,8 @@
+Based on the following target written in the {language} programming language, how would you rate the code quality of the target on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/readability.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Is the comment clear to read?
+10 - The comment is well-written.
+6-9 - The comment has few problems.
+2-5 - The comment has many problems.
+1 - The comment is unreadable.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/usefulness.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Is the comment useful?
+10 - The comment helps an expert programmer understand the code better.
+6-9 - The comment helps an average programmer understand the code better.
+2-5 - The comment documents only trivial functionality.
+1 - The comment is not useful at any level.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/parsers/code_parser.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import re
-from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import BaseMessage
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -18,8 +17,9 @@ class CodeParser(JanusParser):
         pattern = rf"```[^\S\r\n]*(?:{self.language}[^\S\r\n]*)?\n?(.*?)\n*```"
         code = re.search(pattern, text, re.DOTALL)
         if code is None:
-            raise OutputParserException(
-                "Code not find code between triple square brackets"
+            raise JanusParserException(
+                text,
+                "Code not find code between triple square brackets",
             )
         return str(code.group(1))

janus/parsers/doc_parser.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_core.messages import BaseMessage
 from langchain_core.pydantic_v1 import BaseModel, Field
 from janus.language.block import CodeBlock
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -86,7 +86,7 @@ class MultiDocumentationParser(JanusParser, PydanticOutputParser):
         return str(self.__class__.name)
-class MadlibsDocumentationParser(JanusParser):
+class ClozeDocumentationParser(JanusParser):
     expected_keys: set[str]
     def __init__(self):
@@ -107,11 +107,12 @@ class MadlibsDocumentationParser(JanusParser):
             obj = parse_json_markdown(text)
         except json.JSONDecodeError as e:
             log.debug(f"Invalid JSON object. Output:\n{text}")
-            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+            raise JanusParserException(text, f"Got invalid JSON object. Error: {e}")
         if not isinstance(obj, dict):
-            raise OutputParserException(
-                f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
+            raise JanusParserException(
+                text,
+                f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
             )
         seen_keys = set(obj.keys())
@@ -122,9 +123,10 @@ class MadlibsDocumentationParser(JanusParser):
             if invalid_keys:
                 log.debug(f"Invalid keys: {invalid_keys}")
             log.debug(f"Missing keys: {missing_keys}")
-            raise OutputParserException(
+            raise JanusParserException(
+                text,
                 f"Got invalid return object. Missing the following expected "
-                f"keys: {missing_keys}"
+                f"keys: {missing_keys}",
             )
         for key in invalid_keys:
@@ -132,9 +134,10 @@ class MadlibsDocumentationParser(JanusParser):
         for value in obj.values():
             if not isinstance(value, str):
-                raise OutputParserException(
+                raise JanusParserException(
+                    text,
                     f"Got invalid return object. Expected all string values,"
-                    f' but got type "{type(value)}"'
+                    f' but got type "{type(value)}"',
                 )
         return json.dumps(obj)

janus/parsers/parser.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from langchain.schema.output_parser import BaseOutputParser
+from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import BaseMessage
 from langchain_core.output_parsers import StrOutputParser
@@ -49,3 +50,9 @@ class GenericParser(JanusParser, StrOutputParser):
     def get_format_instructions(self) -> str:
         return "Output should be a string"
+class JanusParserException(OutputParserException):
+    def __init__(self, unparsed_output, *args, **kwargs):
+        self.unparsed_output = unparsed_output
+        super().__init__(*args, **kwargs)

janus/parsers/partition_parser.py CHANGED Viewed

@@ -9,7 +9,7 @@ from langchain_core.messages import BaseMessage
 from langchain_core.pydantic_v1 import BaseModel, Field
 from janus.language.block import CodeBlock
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -97,6 +97,7 @@ class PartitionParser(JanusParser, PydanticOutputParser):
     def parse(self, text: str | BaseMessage) -> str:
         if isinstance(text, BaseMessage):
             text = str(text.content)
+        original_text = text
         # Strip everything outside the JSON object
         begin, end = text.find("["), text.rfind("]")
@@ -122,7 +123,7 @@ class PartitionParser(JanusParser, PydanticOutputParser):
                 + ", ".join(invalid_splits)
             )
             log.warning(err_msg)
-            raise OutputParserException(err_msg)
+            raise JanusParserException(original_text, err_msg)
         # Map line IDs to indices (so they can be sorted and lines indexed)
         index_to_line_id = {0: "START", None: "END"}
@@ -160,9 +161,10 @@ class PartitionParser(JanusParser, PydanticOutputParser):
                 "Oversized chunks:\n"
                 + "\n#############\n".join(chunk for _, chunk, _ in data)
             )
-            raise OutputParserException(
+            raise JanusParserException(
+                original_text,
                 f"The following segments are too long and must be "
-                f"further subdivided:\n{problem_points}"
+                f"further subdivided:\n{problem_points}",
             )
         return "\n<JANUS_PARTITION>\n".join(chunks)

janus/parsers/reqs_parser.py CHANGED Viewed

@@ -2,10 +2,9 @@ import json
 import re
 from langchain.output_parsers.json import parse_json_markdown
-from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import BaseMessage
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -20,6 +19,7 @@ class RequirementsParser(JanusParser):
     def parse(self, text: str | BaseMessage) -> str:
         if isinstance(text, BaseMessage):
             text = str(text.content)
+        original_text = text
         # TODO: This is an incorrect implementation (lstrip and rstrip take character
         #       lists and strip any instances of those characters, not the full str)
@@ -30,15 +30,18 @@ class RequirementsParser(JanusParser):
             obj = parse_json_markdown(text)
         except json.JSONDecodeError as e:
             log.debug(f"Invalid JSON object. Output:\n{text}")
-            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+            raise JanusParserException(
+                original_text, f"Got invalid JSON object. Error: {e}"
+            )
         if not isinstance(obj, dict):
-            raise OutputParserException(
-                f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
+            raise JanusParserException(
+                original_text,
+                f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
             )
         return json.dumps(obj)
-    def parse_combined_output(self, text: str):
+    def parse_combined_output(self, text: str) -> str:
         """Parse the output text from the LLM when multiple inputs are combined.
         Arguments:
@@ -49,10 +52,10 @@ class RequirementsParser(JanusParser):
         """
         json_strings = re.findall(r"\{.*?\}", text)
         output_list = list()
-        for i, json_string in enumerate(json_strings, 1):
+        for _, json_string in enumerate(json_strings, 1):
             json_dict = json.loads(json_string)
             output_list.append(json_dict["requirements"])
-        return output_list
+        return json.dumps(output_list)
     def get_format_instructions(self) -> str:
         """Get the format instructions for the parser.

janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl

janus-llm 4.3.1py3-none-any.whl → 4.4.5py3-none-any.whl