PyPI - janus-llm - Versions diffs - 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl - Mend

janus-llm 4.2.0py3-none-any.whl → 4.3.5py3-none-any.whl

Files changed (134) hide show

janus/__init__.py +1 -1
janus/__main__.py +1 -1
janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
janus/_tests/test_cli.py +3 -2
janus/cli/aggregate.py +135 -0
janus/cli/cli.py +111 -0
janus/cli/constants.py +43 -0
janus/cli/database.py +289 -0
janus/cli/diagram.py +178 -0
janus/cli/document.py +174 -0
janus/cli/embedding.py +122 -0
janus/cli/llm.py +187 -0
janus/cli/partition.py +125 -0
janus/cli/self_eval.py +149 -0
janus/cli/translate.py +183 -0
janus/converter/__init__.py +1 -1
janus/converter/_tests/test_translate.py +2 -0
janus/converter/converter.py +129 -92
janus/converter/document.py +21 -14
janus/converter/evaluate.py +237 -4
janus/converter/translate.py +3 -3
janus/embedding/collections.py +1 -1
janus/language/alc/_tests/alc.asm +3779 -0
janus/language/alc/_tests/test_alc.py +1 -1
janus/language/alc/alc.py +9 -4
janus/language/binary/_tests/hello.bin +0 -0
janus/language/block.py +47 -12
janus/language/file.py +1 -1
janus/language/mumps/_tests/mumps.m +235 -0
janus/language/splitter.py +31 -23
janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
janus/language/treesitter/_tests/languages/matlab.m +225 -0
janus/language/treesitter/treesitter.py +9 -1
janus/llm/models_info.py +26 -13
janus/metrics/_tests/asm_test_file.asm +10 -0
janus/metrics/_tests/mumps_test_file.m +6 -0
janus/metrics/_tests/test_treesitter_metrics.py +1 -1
janus/metrics/prompts/clarity.txt +8 -0
janus/metrics/prompts/completeness.txt +16 -0
janus/metrics/prompts/faithfulness.txt +10 -0
janus/metrics/prompts/hallucination.txt +16 -0
janus/metrics/prompts/quality.txt +8 -0
janus/metrics/prompts/readability.txt +16 -0
janus/metrics/prompts/usefulness.txt +16 -0
janus/parsers/code_parser.py +4 -4
janus/parsers/doc_parser.py +12 -9
janus/parsers/eval_parsers/incose_parser.py +134 -0
janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
janus/parsers/parser.py +7 -0
janus/parsers/partition_parser.py +47 -13
janus/parsers/reqs_parser.py +8 -5
janus/parsers/uml.py +5 -4
janus/prompts/prompt.py +2 -2
janus/prompts/templates/README.md +30 -0
janus/prompts/templates/basic_aggregation/human.txt +6 -0
janus/prompts/templates/basic_aggregation/system.txt +1 -0
janus/prompts/templates/basic_refinement/human.txt +14 -0
janus/prompts/templates/basic_refinement/system.txt +1 -0
janus/prompts/templates/diagram/human.txt +9 -0
janus/prompts/templates/diagram/system.txt +1 -0
janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
janus/prompts/templates/document/human.txt +10 -0
janus/prompts/templates/document/system.txt +1 -0
janus/prompts/templates/document_cloze/human.txt +11 -0
janus/prompts/templates/document_cloze/system.txt +1 -0
janus/prompts/templates/document_cloze/variables.json +4 -0
janus/prompts/templates/document_cloze/variables_asm.json +4 -0
janus/prompts/templates/document_inline/human.txt +13 -0
janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
janus/prompts/templates/multidocument/human.txt +15 -0
janus/prompts/templates/multidocument/system.txt +1 -0
janus/prompts/templates/partition/human.txt +22 -0
janus/prompts/templates/partition/system.txt +1 -0
janus/prompts/templates/partition/variables.json +4 -0
janus/prompts/templates/pseudocode/human.txt +7 -0
janus/prompts/templates/pseudocode/system.txt +7 -0
janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
janus/prompts/templates/refinement/hallucination/human.txt +13 -0
janus/prompts/templates/refinement/hallucination/system.txt +1 -0
janus/prompts/templates/refinement/reflection/human.txt +15 -0
janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/reflection/system.txt +1 -0
janus/prompts/templates/refinement/revision/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
janus/prompts/templates/refinement/revision/system.txt +1 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
janus/prompts/templates/requirements/human.txt +13 -0
janus/prompts/templates/requirements/system.txt +2 -0
janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
janus/prompts/templates/simple/human.txt +16 -0
janus/prompts/templates/simple/system.txt +3 -0
janus/refiners/format.py +49 -0
janus/refiners/refiner.py +143 -4
janus/utils/enums.py +140 -111
janus/utils/logger.py +2 -0
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
janus_llm-4.3.5.dist-info/RECORD +210 -0
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
janus/cli.py +0 -1343
janus_llm-4.2.0.dist-info/RECORD +0 -113
janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
{janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0

janus/llm/models_info.py CHANGED Viewed

@@ -6,9 +6,13 @@ from typing import Callable, Protocol, TypeVar
 from dotenv import load_dotenv
 from langchain_community.llms import HuggingFaceTextGenInference
 from langchain_core.runnables import Runnable
-from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
-from janus.llm.model_callbacks import COST_PER_1K_TOKENS, azure_model_reroutes
+from janus.llm.model_callbacks import (
+    COST_PER_1K_TOKENS,
+    azure_model_reroutes,
+    openai_model_reroutes,
+)
 from janus.prompts.prompt import (
     ChatGptPromptEngine,
     ClaudePromptEngine,
@@ -46,6 +50,7 @@ except ImportError:
 ModelType = TypeVar(
     "ModelType",
     AzureChatOpenAI,
+    ChatOpenAI,
     HuggingFaceTextGenInference,
     Bedrock,
     BedrockChat,
@@ -127,7 +132,7 @@ bedrock_models = [
 all_models = [*azure_models, *bedrock_models]
 MODEL_TYPE_CONSTRUCTORS: dict[str, ModelType] = {
-    # "OpenAI": ChatOpenAI,
+    "OpenAI": ChatOpenAI,
     "HuggingFace": HuggingFaceTextGenInference,
     "Azure": AzureChatOpenAI,
     "Bedrock": Bedrock,
@@ -137,7 +142,7 @@ MODEL_TYPE_CONSTRUCTORS: dict[str, ModelType] = {
 MODEL_PROMPT_ENGINES: dict[str, Callable[..., PromptEngine]] = {
-    # **{m: ChatGptPromptEngine for m in openai_models},
+    **{m: ChatGptPromptEngine for m in openai_models},
     **{m: ChatGptPromptEngine for m in azure_models},
     **{m: ClaudePromptEngine for m in claude_models},
     **{m: Llama2PromptEngine for m in llama2_models},
@@ -148,7 +153,7 @@ MODEL_PROMPT_ENGINES: dict[str, Callable[..., PromptEngine]] = {
 }
 MODEL_ID_TO_LONG_ID = {
-    # **{m: mr for m, mr in openai_model_reroutes.items()},
+    **{m: mr for m, mr in openai_model_reroutes.items()},
     **{m: mr for m, mr in azure_model_reroutes.items()},
     "bedrock-claude-v2": "anthropic.claude-v2",
     "bedrock-claude-instant-v1": "anthropic.claude-instant-v1",
@@ -181,7 +186,7 @@ DEFAULT_MODELS = list(MODEL_DEFAULT_ARGUMENTS.keys())
 MODEL_CONFIG_DIR = Path.home().expanduser() / ".janus" / "llm"
 MODEL_TYPES: dict[str, PromptEngine] = {
-    # **{m: "OpenAI" for m in openai_models},
+    **{m: "OpenAI" for m in openai_models},
     **{m: "Azure" for m in azure_models},
     **{m: "BedrockChat" for m in bedrock_models},
 }
@@ -243,6 +248,7 @@ def load_model(model_id) -> JanusModel:
         token_limit = model_config["token_limit"]
         input_token_cost = model_config["model_cost"]["input"]
         output_token_cost = model_config["model_cost"]["output"]
+        input_token_proportion = model_config["input_token_proportion"]
     elif model_id in DEFAULT_MODELS:
         model_id = model_id
@@ -253,6 +259,7 @@ def load_model(model_id) -> JanusModel:
         token_limit = 0
         input_token_cost = 0.0
         output_token_cost = 0.0
+        input_token_proportion = 0.4
         if model_long_id in TOKEN_LIMITS:
             token_limit = TOKEN_LIMITS[model_long_id]
         if model_long_id in COST_PER_1K_TOKENS:
@@ -282,22 +289,22 @@ def load_model(model_id) -> JanusModel:
     elif model_type_name == "OpenAI":
         model_args.update(
             openai_api_key=str(os.getenv("OPENAI_API_KEY")),
-            openai_organization=str(os.getenv("OPENAI_ORG_ID")),
         )
         # log.warning("Do NOT use this model in sensitive environments!")
         # log.warning("If you would like to cancel, please press Ctrl+C.")
         # log.warning("Waiting 10 seconds...")
         # Give enough time for the user to read the warnings and cancel
         # time.sleep(10)
-        raise DeprecationWarning("OpenAI models are no longer supported.")
+        # raise DeprecationWarning("OpenAI models are no longer supported.")
     elif model_type_name == "Azure":
         model_args.update(
-            {
-                "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
-                "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
-                "api_version": os.getenv("OPENAI_API_VERSION", "2024-02-01"),
-            }
+            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+            api_version=os.getenv("OPENAI_API_VERSION", "2024-02-01"),
+            azure_deployment=model_id,
+            request_timeout=3600,
+            max_tokens=4096,
         )
     model_type = MODEL_TYPE_CONSTRUCTORS[model_type_name]
@@ -305,15 +312,20 @@ def load_model(model_id) -> JanusModel:
     class JanusModel(model_type):
         model_id: str
+        # model_name is for LangChain compatibility
+        # It searches for `self.model_name` when counting tokens
+        model_name: str
         short_model_id: str
         model_type_name: str
         token_limit: int
+        input_token_proportion: float
         input_token_cost: float
         output_token_cost: float
         prompt_engine: type[PromptEngine]
     model_args.update(
         model_id=MODEL_ID_TO_LONG_ID[model_id],
+        model_name=model_id,  # This is for LangChain compatibility
         short_model_id=model_id,
     )
@@ -322,6 +334,7 @@ def load_model(model_id) -> JanusModel:
         token_limit=token_limit,
         input_token_cost=input_token_cost,
         output_token_cost=output_token_cost,
+        input_token_proportion=input_token_proportion,
         prompt_engine=prompt_engine,
         **model_args,
     )

janus/metrics/_tests/asm_test_file.asm ADDED Viewed

@@ -0,0 +1,10 @@
+NAME     OPA   OPSA,OPSB
+         OPB   OPSC,OPSC REMARK
+NAME     OPC   OPSA,OPSB
+         OPD   OPSA,OPSB REMARK2
+         B     OPSA
+         OPD
+         B     OPSB
+NAME     OPC   OPSA,OPSB
+         OPC
+         OPC   OPSA,OPSB

janus/metrics/_tests/mumps_test_file.m ADDED Viewed

@@ -0,0 +1,6 @@
+FUNC(a, b) ; apples
+  set apples=8
+  write a,!
+  write a,!
+  if abc=70 set f=1
+  quit 0

janus/metrics/_tests/test_treesitter_metrics.py CHANGED Viewed

@@ -3,7 +3,7 @@ from pathlib import Path
 from typer.testing import CliRunner
-from janus.cli import app
+from janus.cli.cli import app
 from janus.metrics.complexity_metrics import (
     TreeSitterMetric,
     cyclomatic_complexity,

janus/metrics/prompts/clarity.txt ADDED Viewed

@@ -0,0 +1,8 @@
+Based on the following target written in the {language} programming language, how would you rate the code clarity of the target on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/completeness.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Does the comment address all capabilities of the relevant source code?
+10 - All essential functionality is documented.
+6-9 - Most essential functionality is documented.
+2-5 - Little essential functionality is documented.
+1 - No essential functionality is documented.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/faithfulness.txt ADDED Viewed

@@ -0,0 +1,10 @@
+Based on the following target and reference written in the {language} programming language, how would you rate the faithfulness of the target to the original reference on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+Reference: the reference code that we are judging the target against
+{format_instructions}
+Target: {target}
+Reference: {reference}

janus/metrics/prompts/hallucination.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Does the comment provide true information?
+10 - The comment provides only true information.
+6-9 - The comment provides mostly true information.
+2-5 - The comment provides mostly untrue information.
+1 - The comment is completely untrue.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/quality.txt ADDED Viewed

@@ -0,0 +1,8 @@
+Based on the following target written in the {language} programming language, how would you rate the code quality of the target on a scale of integers from 1 to 10? Higher is better.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/readability.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Is the comment clear to read?
+10 - The comment is well-written.
+6-9 - The comment has few problems.
+2-5 - The comment has many problems.
+1 - The comment is unreadable.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/metrics/prompts/usefulness.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Use the following rubric to evaluate the target written in the {language} programming language:
+Rubric:
+Is the comment useful?
+10 - The comment helps an expert programmer understand the code better.
+6-9 - The comment helps an average programmer understand the code better.
+2-5 - The comment documents only trivial functionality.
+1 - The comment is not useful at any level.
+Think through your answer before selecting a rating with the following format:
+Target: the target code
+{format_instructions}
+Target: {target}

janus/parsers/code_parser.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import re
-from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import BaseMessage
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -18,8 +17,9 @@ class CodeParser(JanusParser):
         pattern = rf"```[^\S\r\n]*(?:{self.language}[^\S\r\n]*)?\n?(.*?)\n*```"
         code = re.search(pattern, text, re.DOTALL)
         if code is None:
-            raise OutputParserException(
-                "Code not find code between triple square brackets"
+            raise JanusParserException(
+                text,
+                "Code not find code between triple square brackets",
             )
         return str(code.group(1))

janus/parsers/doc_parser.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_core.messages import BaseMessage
 from langchain_core.pydantic_v1 import BaseModel, Field
 from janus.language.block import CodeBlock
-from janus.parsers.parser import JanusParser
+from janus.parsers.parser import JanusParser, JanusParserException
 from janus.utils.logger import create_logger
 log = create_logger(__name__)
@@ -86,7 +86,7 @@ class MultiDocumentationParser(JanusParser, PydanticOutputParser):
         return str(self.__class__.name)
-class MadlibsDocumentationParser(JanusParser):
+class ClozeDocumentationParser(JanusParser):
     expected_keys: set[str]
     def __init__(self):
@@ -107,11 +107,12 @@ class MadlibsDocumentationParser(JanusParser):
             obj = parse_json_markdown(text)
         except json.JSONDecodeError as e:
             log.debug(f"Invalid JSON object. Output:\n{text}")
-            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+            raise JanusParserException(text, f"Got invalid JSON object. Error: {e}")
         if not isinstance(obj, dict):
-            raise OutputParserException(
-                f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
+            raise JanusParserException(
+                text,
+                f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
             )
         seen_keys = set(obj.keys())
@@ -122,9 +123,10 @@ class MadlibsDocumentationParser(JanusParser):
             if invalid_keys:
                 log.debug(f"Invalid keys: {invalid_keys}")
             log.debug(f"Missing keys: {missing_keys}")
-            raise OutputParserException(
+            raise JanusParserException(
+                text,
                 f"Got invalid return object. Missing the following expected "
-                f"keys: {missing_keys}"
+                f"keys: {missing_keys}",
             )
         for key in invalid_keys:
@@ -132,9 +134,10 @@ class MadlibsDocumentationParser(JanusParser):
         for value in obj.values():
             if not isinstance(value, str):
-                raise OutputParserException(
+                raise JanusParserException(
+                    text,
                     f"Got invalid return object. Expected all string values,"
-                    f' but got type "{type(value)}"'
+                    f' but got type "{type(value)}"',
                 )
         return json.dumps(obj)

janus/parsers/eval_parsers/incose_parser.py ADDED Viewed

@@ -0,0 +1,134 @@
+import json
+import random
+import uuid
+from typing import List
+from langchain.output_parsers import PydanticOutputParser
+from langchain_core.exceptions import OutputParserException
+from langchain_core.messages import BaseMessage
+from langchain_core.pydantic_v1 import BaseModel, Field, validator
+from janus.language.block import CodeBlock
+from janus.parsers.parser import JanusParser
+from janus.utils.logger import create_logger
+log = create_logger(__name__)
+RNG = random.Random()
+class Criteria(BaseModel):
+    reasoning: str = Field(description="A short explanation for the given assessment")
+    score: str = Field("A simple `pass` or `fail`")
+    @validator("score")
+    def score_is_valid(cls, v: str):
+        v = v.lower().strip()
+        if v not in {"pass", "fail"}:
+            raise OutputParserException("Score must be either 'pass' or 'fail'")
+        return v
+class Requirement(BaseModel):
+    requirement_id: str = Field(description="The 8-character comment ID")
+    requirement: str = Field(description="The original requirement being evaluated")
+    C1: Criteria
+    C2: Criteria
+    C3: Criteria
+    C4: Criteria
+    C5: Criteria
+    C6: Criteria
+    C7: Criteria
+    C8: Criteria
+    C9: Criteria
+class RequirementList(BaseModel):
+    __root__: List[Requirement] = Field(
+        description=(
+            "A list of requirement evaluations. Each element should include"
+            " the requirement's 8-character ID in the `requirement_id` field,"
+            " the original requirement in the 'requirement' field, "
+            " and nine score objects corresponding to each criterion."
+        )
+    )
+class IncoseParser(JanusParser, PydanticOutputParser):
+    requirements: dict[str, str]
+    def __init__(self):
+        PydanticOutputParser.__init__(
+            self,
+            pydantic_object=RequirementList,
+            requirements={},
+        )
+    def parse_input(self, block: CodeBlock) -> str:
+        # TODO: Perform comment stripping/placeholding here rather than in script
+        text = super().parse_input(block)
+        RNG.seed(text)
+        obj = json.loads(text)
+        # For some reason requirements objects are in a double list?
+        reqs = obj["requirements"]
+        # Generate a unique ID for each requirement (ensure they are unique)
+        req_ids = set()
+        while len(req_ids) < len(reqs):
+            req_ids.add(str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8])
+        self.requirements = dict(zip(req_ids, reqs))
+        reqs_str = "\n\n".join(
+            f"Requirement {rid} : {req}" for rid, req in self.requirements.items()
+        )
+        obj["requirements"] = reqs_str
+        return json.dumps(obj)
+    def parse(self, text: str | BaseMessage) -> str:
+        if isinstance(text, BaseMessage):
+            text = str(text.content)
+        # Strip everything outside the JSON object
+        begin, end = text.find("["), text.rfind("]")
+        text = text[begin : end + 1]
+        try:
+            out: RequirementList = super().parse(text)
+        except json.JSONDecodeError as e:
+            log.debug(f"Invalid JSON object. Output:\n{text}")
+            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+        evals: dict[str, dict] = {c.requirement_id: c.dict() for c in out.__root__}
+        seen_keys = set(evals.keys())
+        expected_keys = set(self.requirements.keys())
+        missing_keys = expected_keys.difference(seen_keys)
+        invalid_keys = seen_keys.difference(expected_keys)
+        if missing_keys:
+            log.debug(f"Missing keys: {missing_keys}")
+            if invalid_keys:
+                log.debug(f"Invalid keys: {invalid_keys}")
+            log.debug(f"Missing keys: {missing_keys}")
+            raise OutputParserException(
+                f"Got invalid return object. Missing the following expected "
+                f"keys: {missing_keys}"
+            )
+        for key in invalid_keys:
+            del evals[key]
+        for rid in evals.keys():
+            evals[rid]["requirement"] = self.requirements[rid]
+            evals[rid].pop("requirement_id")
+        return json.dumps(evals)
+    def parse_combined_output(self, text: str) -> str:
+        if not text.strip():
+            return str({})
+        objs = [json.loads(line.strip()) for line in text.split("\n") if line.strip()]
+        output_obj = {}
+        for obj in objs:
+            output_obj.update(obj)
+        return json.dumps(output_obj)

janus/parsers/eval_parsers/inline_comment_parser.py ADDED Viewed

@@ -0,0 +1,112 @@
+import json
+import re
+from typing import Any
+from langchain.output_parsers import PydanticOutputParser
+from langchain_core.exceptions import OutputParserException
+from langchain_core.messages import BaseMessage
+from langchain_core.pydantic_v1 import BaseModel, Field, conint
+from janus.language.block import CodeBlock
+from janus.parsers.parser import JanusParser
+from janus.utils.logger import create_logger
+log = create_logger(__name__)
+class Criteria(BaseModel):
+    reasoning: str = Field(description="A short explanation for the given score")
+    # Constrained to an integer between 1 and 4
+    score: conint(ge=1, le=4) = Field(  # type: ignore
+        description="An integer score between 1 and 4 (inclusive), 4 being the best"
+    )
+class Comment(BaseModel):
+    comment_id: str = Field(description="The 8-character comment ID")
+    completeness: Criteria = Field(description="The completeness of the comment")
+    hallucination: Criteria = Field(description="The factualness of the comment")
+    readability: Criteria = Field(description="The readability of the comment")
+    usefulness: Criteria = Field(description="The usefulness of the comment")
+class CommentList(BaseModel):
+    __root__: list[Comment] = Field(
+        description=(
+            "A list of inline comment evaluations. Each element should include"
+            " the comment's 8-character ID in the `comment_id` field, and four"
+            " score objects corresponding to each metric (`completeness`,"
+            " `hallucination`, `readability`, and `usefulness`)."
+        )
+    )
+class InlineCommentParser(JanusParser, PydanticOutputParser):
+    comments: dict[str, str]
+    def __init__(self):
+        PydanticOutputParser.__init__(
+            self,
+            pydantic_object=CommentList,
+            comments=[],
+        )
+    def parse_input(self, block: CodeBlock) -> str:
+        # TODO: Perform comment stripping/placeholding here rather than in script
+        text = super().parse_input(block)
+        self.comments = dict(
+            re.findall(
+                r"<(?:BLOCK|INLINE)_COMMENT (\w{8})> (.*)$",
+                text,
+                flags=re.MULTILINE,
+            )
+        )
+        return text
+    def parse(self, text: str | BaseMessage) -> str:
+        if isinstance(text, BaseMessage):
+            text = str(text.content)
+        # Strip everything outside the JSON object
+        begin, end = text.find("["), text.rfind("]")
+        text = text[begin : end + 1]
+        try:
+            out: CommentList = super().parse(text)
+        except json.JSONDecodeError as e:
+            log.debug(f"Invalid JSON object. Output:\n{text}")
+            raise OutputParserException(f"Got invalid JSON object. Error: {e}")
+        evals: dict[str, Any] = {c.comment_id: c.dict() for c in out.__root__}
+        seen_keys = set(evals.keys())
+        expected_keys = set(self.comments.keys())
+        missing_keys = expected_keys.difference(seen_keys)
+        invalid_keys = seen_keys.difference(expected_keys)
+        if missing_keys:
+            log.debug(f"Missing keys: {missing_keys}")
+            if invalid_keys:
+                log.debug(f"Invalid keys: {invalid_keys}")
+            log.debug(f"Missing keys: {missing_keys}")
+            raise OutputParserException(
+                f"Got invalid return object. Missing the following expected "
+                f"keys: {missing_keys}"
+            )
+        for key in invalid_keys:
+            del evals[key]
+        for cid in evals.keys():
+            evals[cid]["comment"] = self.comments[cid]
+            evals[cid].pop("comment_id")
+        return json.dumps(evals)
+    def parse_combined_output(self, text: str) -> str:
+        if not text.strip():
+            return str({})
+        objs = [json.loads(line.strip()) for line in text.split("\n") if line.strip()]
+        output_obj = {}
+        for obj in objs:
+            output_obj.update(obj)
+        return json.dumps(output_obj)

janus/parsers/parser.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from langchain.schema.output_parser import BaseOutputParser
+from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import BaseMessage
 from langchain_core.output_parsers import StrOutputParser
@@ -49,3 +50,9 @@ class GenericParser(JanusParser, StrOutputParser):
     def get_format_instructions(self) -> str:
         return "Output should be a string"
+class JanusParserException(OutputParserException):
+    def __init__(self, unparsed_output, *args, **kwargs):
+        self.unparsed_output = unparsed_output
+        super().__init__(*args, **kwargs)

janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl

janus-llm 4.2.0py3-none-any.whl → 4.3.5py3-none-any.whl