PyPI - janus-llm - Versions diffs - 4.4.5__py3-none-any.whl → 4.5.4__py3-none-any.whl - Mend

janus-llm 4.4.5py3-none-any.whl → 4.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

janus/__init__.py +1 -1
janus/cli/pipeline.py +6 -3
janus/cli/self_eval.py +9 -0
janus/converter/__init__.py +2 -0
janus/converter/_tests/test_translate.py +1 -0
janus/converter/chain.py +53 -133
janus/converter/converter.py +199 -77
janus/converter/diagram.py +5 -3
janus/converter/document.py +10 -4
janus/converter/evaluate.py +148 -113
janus/converter/partition.py +4 -1
janus/converter/passthrough.py +29 -0
janus/converter/pool.py +74 -0
janus/converter/requirements.py +4 -1
janus/language/_tests/test_combine.py +1 -0
janus/language/block.py +84 -3
janus/llm/model_callbacks.py +6 -0
janus/llm/models_info.py +19 -0
janus/metrics/_tests/test_reading.py +48 -4
janus/metrics/_tests/test_rouge_score.py +5 -11
janus/metrics/reading.py +48 -28
janus/metrics/rouge_score.py +21 -34
janus/parsers/_tests/test_code_parser.py +1 -1
janus/parsers/code_parser.py +2 -2
janus/parsers/eval_parsers/incose_parser.py +3 -3
janus/prompts/templates/cyclic/human.txt +16 -0
janus/prompts/templates/cyclic/system.txt +1 -0
janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
janus/prompts/templates/extract_variables/human.txt +5 -0
janus/prompts/templates/extract_variables/system.txt +1 -0
{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +3 -4
{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +35 -29
{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0

janus/llm/models_info.py CHANGED Viewed

@@ -96,12 +96,16 @@ claude_models = [
     "bedrock-claude-haiku",
     "bedrock-claude-sonnet",
     "bedrock-claude-sonnet-3.5",
+    "bedrock-claude-sonnet-3.5-v2",
 ]
 llama2_models = [
     "bedrock-llama2-70b",
     "bedrock-llama2-70b-chat",
     "bedrock-llama2-13b",
     "bedrock-llama2-13b-chat",
+    "bedrock-llama3-8b-instruct",
+    "bedrock-llama3-70b-instruct",
+    "bedrock-llama3-3-70b-instruct",
 ]
 llama3_models = [
     "bedrock-llama3-8b-instruct",
@@ -113,6 +117,11 @@ titan_models = [
     "bedrock-jurassic-2-mid",
     "bedrock-jurassic-2-ultra",
 ]
+nova_models = [
+    "bedrock-nova-lite",
+    "bedrock-nova-micro",
+    "bedrock-nova-pro",
+]
 cohere_models = [
     "bedrock-command-r-plus",
 ]
@@ -160,12 +169,17 @@ MODEL_ID_TO_LONG_ID = {
     "bedrock-claude-haiku": "anthropic.claude-3-haiku-20240307-v1:0",
     "bedrock-claude-sonnet": "anthropic.claude-3-sonnet-20240229-v1:0",
     "bedrock-claude-sonnet-3.5": "anthropic.claude-3-5-sonnet-20240620-v1:0",
+    "bedrock-claude-sonnet-3.5-v2": "anthropic.claude-3-5-sonnet-20241022-v2:0",
     "bedrock-llama2-70b": "meta.llama2-70b-v1",
     "bedrock-llama2-70b-chat": "meta.llama2-70b-chat-v1",
     "bedrock-llama2-13b": "meta.llama2-13b-chat-v1",
     "bedrock-llama2-13b-chat": "meta.llama2-13b-v1",
     "bedrock-llama3-8b-instruct": "meta.llama3-8b-instruct-v1:0",
     "bedrock-llama3-70b-instruct": "meta.llama3-70b-instruct-v1:0",
+    "bedrock-llama3-3-70b-instruct": "meta.llama3-3-70b-instruct-v1:0",
+    "bedrock-nova-lite": "amazon.nova-lite-v1:0",
+    "bedrock-nova-micro": "amazon.nova-micro-v1:0",
+    "bedrock-nova-pro": "amazon.nova-pro-v1:0",
     "bedrock-titan-text-lite": "amazon.titan-text-lite-v1",
     "bedrock-titan-text-express": "amazon.titan-text-express-v1",
     "bedrock-jurassic-2-mid": "ai21.j2-mid-v1",
@@ -208,12 +222,17 @@ TOKEN_LIMITS: dict[str, int] = {
     "anthropic.claude-3-haiku-20240307-v1:0": 248_000,
     "anthropic.claude-3-sonnet-20240229-v1:0": 248_000,
     "anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000,
+    "anthropic.claude-3-5-sonnet-20241022-v2:0": 200_000,
     "meta.llama2-70b-v1": 4096,
     "meta.llama2-70b-chat-v1": 4096,
     "meta.llama2-13b-chat-v1": 4096,
     "meta.llama2-13b-v1": 4096,
     "meta.llama3-8b-instruct-v1:0": 8000,
     "meta.llama3-70b-instruct-v1:0": 8000,
+    "meta.llama3-3-70b-instruct-v1:0": 128_000,
+    "amazon.nova-lite-v1:0": 300_000,
+    "amazon.nova-micro-v1:0": 128_000,
+    "amazon.nova-pro-v1:0": 300_000,
     "amazon.titan-text-lite-v1": 4096,
     "amazon.titan-text-express-v1": 8192,
     "ai21.j2-mid-v1": 8192,

janus/metrics/_tests/test_reading.py CHANGED Viewed

@@ -1,11 +1,25 @@
 import unittest
-from janus.metrics.reading import _repeat_text, flesch, gunning_fog
+from janus.metrics.reading import (
+    _repeat_text,
+    automated_readability,
+    coleman_liau,
+    dale_chall,
+    flesch,
+    flesch_grade,
+    gunning_fog,
+    word_count,
+)
 class TestReading(unittest.TestCase):
     def setUp(self):
-        self.text = "This is a sample text for testing readability metrics"
+        self.text = "This is a sample text for testing readability metrics."
+    def test_word_count(self):
+        """Test the word_count function."""
+        count = word_count(self.text)
+        self.assertEqual(count, 9)
     def test_repeat_text(self):
         """Test the _repeat_text function."""
@@ -16,12 +30,42 @@ class TestReading(unittest.TestCase):
     def test_flesch(self):
         """Test the Flesch readability score."""
         score = flesch(self.text)
-        self.assertAlmostEqual(score, 47.3, places=2)
+        self.assertAlmostEqual(score, 45.42, places=2)
+    def test_flesch_grade(self):
+        """Test the Flesch Grade Level readability score."""
+        score = flesch_grade(self.text)
+        self.assertAlmostEqual(score, 9.2, places=2)
     def test_gunning_fog(self):
         """Test the Gunning-Fog readability score."""
         score = gunning_fog(self.text)
-        self.assertAlmostEqual(score, 8.04, places=2)
+        self.assertAlmostEqual(score, 3.97, places=2)
+    def test_dale_chall(self):
+        """Test the Dale-Chall readability score."""
+        score = dale_chall(self.text)
+        self.assertAlmostEqual(score, 4.67, places=2)
+    def test_automated_readability(self):
+        """Test the Automated Readability Index score."""
+        score = automated_readability(self.text)
+        self.assertAlmostEqual(score, 7.1, places=2)
+    def test_coleman_liau(self):
+        """Test the Coleman-Liau Index."""
+        score = coleman_liau(self.text)
+        self.assertAlmostEqual(score, 9.94, places=2)
+    def test_blank_target(self):
+        """Test that blank targets return None for all metric functions."""
+        blank = "   "  # blank string with whitespaces
+        self.assertIsNone(flesch(blank))
+        self.assertIsNone(flesch_grade(blank))
+        self.assertIsNone(gunning_fog(blank))
+        self.assertIsNone(dale_chall(blank))
+        self.assertIsNone(automated_readability(blank))
+        self.assertIsNone(coleman_liau(blank))
 if __name__ == "__main__":

janus/metrics/_tests/test_rouge_score.py CHANGED Viewed

@@ -12,19 +12,13 @@ class TestRouge(unittest.TestCase):
         score = rouge(
             self.target, self.reference, granularity="n", n_gram=2, score_type="f"
         )
-        self.assertIsInstance(score, float)
+        self.assertEqual(score, 0.5)
     def test_rouge_with_granularity_l(self):
         score = rouge(
             self.target, self.reference, granularity="l", n_gram=2, score_type="f"
         )
-        self.assertIsInstance(score, float)
-    def test_rouge_with_granularity_w(self):
-        score = rouge(
-            self.target, self.reference, granularity="w", n_gram=2, score_type="f"
-        )
-        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, 0.8, places=2)
     def test_rouge_with_invalid_granularity(self):
         with self.assertRaises(ValueError):
@@ -40,19 +34,19 @@ class TestRouge(unittest.TestCase):
         score = rouge(
             self.target, self.reference, granularity="n", n_gram=2, score_type="f"
         )
-        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, 0.5, places=2)
     def test_rouge_with_score_type_p(self):
         score = rouge(
             self.target, self.reference, granularity="n", n_gram=2, score_type="p"
         )
-        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, 0.5, places=2)
     def test_rouge_with_score_type_r(self):
         score = rouge(
             self.target, self.reference, granularity="n", n_gram=2, score_type="r"
         )
-        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, 0.5, places=2)
     def test_rouge_with_invalid_score_type(self):
         with self.assertRaises(ValueError):

janus/metrics/reading.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import re
-import nltk
-import readability
 from nltk.tokenize import TweetTokenizer
+from textstat import textstat
 from janus.metrics.metric import metric
@@ -48,23 +47,9 @@ def _repeat_text(text):
     return repeated_text
-def get_readability(target: str) -> readability.Readability:
-    """Create a Readability object from an input string
-    Arguments:
-        target: The target text.
-    Returns:
-        py-readability-metrics Readability object for that text
-    """
-    nltk.download("punkt", quiet=True)
-    target = _repeat_text(target)
-    return readability.Readability(target)
 @metric(use_reference=False, help="The Flesch Readability score")
 def flesch(target: str, **kwargs) -> float:
-    """Calculate the Flesch Score using py-readability-metrics.
+    """Calculate the Flesch Score using textstat.
     Arguments:
         target: The target text.
@@ -74,12 +59,13 @@ def flesch(target: str, **kwargs) -> float:
     """
     if not target.strip():  # Check if the target text is blank
         return None
-    return get_readability(target).flesch().score
+    target = _repeat_text(target)
+    return textstat.flesch_reading_ease(target)
 @metric(use_reference=False, help="The Flesch Grade Level Readability score")
 def flesch_grade(target: str, **kwargs) -> float:
-    """Calculate the Flesch Score using py-readability-metrics.
+    """Calculate the Flesch Score using textstat.
     Arguments:
         target: The target text.
@@ -89,12 +75,13 @@ def flesch_grade(target: str, **kwargs) -> float:
     """
     if not target.strip():  # Check if the target text is blank
         return None
-    return get_readability(target).flesch_kincaid().score
+    target = _repeat_text(target)
+    return textstat.flesch_kincaid_grade(target)
 @metric(use_reference=False, help="The Gunning-Fog Readability score")
 def gunning_fog(target: str, **kwargs) -> float:
-    """Calculate the Gunning-Fog Score using py-readability-metrics.
+    """Calculate the Gunning-Fog Score using textstat.
     Arguments:
         target: The target text.
@@ -104,20 +91,53 @@ def gunning_fog(target: str, **kwargs) -> float:
     """
     if not target.strip():  # Check if the target text is blank
         return None
-    return get_readability(target).gunning_fog().score
+    target = _repeat_text(target)
+    return textstat.gunning_fog(target)
-@metric(use_reference=False, help="The Gunning-Fog Grade Level Readability score")
-def gunning_fog_grade(target: str, **kwargs) -> float:
-    """Calculate the Gunning-Fog Grade Level Score using py-readability-metrics.
+@metric(use_reference=False, help="The Dale-Chall Readability score")
+def dale_chall(target: str, **kwargs) -> float:
+    """Calculate the Dale-Chall Readability Score using textstat.
     Arguments:
         target: The target text.
     Returns:
-        The Gunning-Fog Grade Level score.
+        The Dale-Chall score.
     """
     if not target.strip():  # Check if the target text is blank
         return None
-    grade_level = get_readability(target).gunning_fog().grade_level
-    return None if grade_level == "na" else grade_level
+    target = _repeat_text(target)
+    return textstat.dale_chall_readability_score_v2(target)
+@metric(use_reference=False, help="The Automated Readability Index")
+def automated_readability(target: str, **kwargs) -> float:
+    """Calculate the Automated Readability Index using textstat.
+    Arguments:
+        target: The target text.
+    Returns:
+        The Automated Readability score.
+    """
+    if not target.strip():  # Check if the target text is blank
+        return None
+    target = _repeat_text(target)
+    return textstat.automated_readability_index(target)
+@metric(use_reference=False, help="The Coleman-Liau Index")
+def coleman_liau(target: str, **kwargs) -> float:
+    """Calculate the Coleman-Liau Index using textstat.
+    Arguments:
+        target: The target text.
+    Returns:
+        The Coleman-Liau Index.
+    """
+    if not target.strip():  # Check if the target text is blank
+        return None
+    target = _repeat_text(target)
+    return textstat.coleman_liau_index(target)

janus/metrics/rouge_score.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import click
-import nltk
 import typer
-from rouge import Rouge
+from rouge_score import rouge_scorer
 from typing_extensions import Annotated
 from janus.metrics.metric import metric
@@ -18,9 +17,9 @@ def rouge(
             "-g",
             help=(
                 "The granularity of the ROUGE score. `n` refers to "
-                "ROUGE-N, `l` refers to ROUGE-L, and `w` refers to ROUGE-W."
+                "ROUGE-N, `l` refers to ROUGE-L."
             ),
-            click_type=click.Choice(["n", "l", "w"]),
+            click_type=click.Choice(["n", "l"]),
         ),
     ] = "n",
     n_gram: Annotated[
@@ -52,7 +51,7 @@ def rouge(
         target: The target text.
         reference: The reference text.
         granularity: The granularity of the ROUGE score. `n` refers to ROUGE-N, `l`
-            refers to ROUGE-L, and `w` refers to ROUGE-W.
+            refers to ROUGE-L.
         n_gram: The n-gram overlap calculated for ROUGE-N. Can be an integer.
         score_type: Whether to use the F-score, precision, or recall. For example, `f`
             refers to the F-score, `p` refers to precision, and `r` refers to recall.
@@ -60,37 +59,25 @@ def rouge(
     Returns:
         The ROUGE score.
     """
-    nltk.download("punkt", quiet=True)
     if granularity.lower() == "n":
-        metric_name = "rouge-n"
-        metric_name_output = f"rouge-{n_gram}"
-        max_n = n_gram
+        metric_name = f"rouge{n_gram}"
     elif granularity.lower() == "l":
-        metric_name = "rouge-l"
-        metric_name_output = "rouge-l"
-        max_n = 4
-    elif granularity.lower() == "w":
-        metric_name = "rouge-w"
-        metric_name_output = "rouge-w"
-        max_n = 4
+        metric_name = "rougeL"
     else:
-        raise ValueError("Invalid granularity. Must be one of `n`, `l`, or `w`.")
-    if score_type.lower() not in ["f", "p", "r"]:
-        raise ValueError("Invalid score type. Must be one of `f`, `p`, or `r`.")
+        raise ValueError("Invalid granularity. Must be one of `n` or `l`")
-    evaluator = Rouge(
-        metrics=[metric_name],
-        max_n=max_n,
-        limit_length=False,
-        length_limit=1_000,
-        length_limit_type="words",
-        apply_avg=False,
-        apply_best=False,
-        alpha=0.5,  # Default F1_score
-        weight_factor=1.2,
-        stemming=True,
+    evaluator = rouge_scorer.RougeScorer(
+        [metric_name],
+        use_stemmer=True,
     )
-    scores = evaluator.get_scores(target, reference)
-    return scores[metric_name_output][0][score_type.lower()][0]
+    scores = evaluator.score(target, reference)
+    scores_fpr = scores[metric_name]
+    if score_type.lower() == "f":
+        score = scores_fpr.fmeasure
+    elif score_type.lower() == "p":
+        score = scores_fpr.precision
+    elif score_type.lower() == "r":
+        score = scores_fpr.recall
+    else:
+        raise ValueError("Invalid score type. Must be one of `f`, `p`, or `r`.")
+    return score

janus/parsers/_tests/test_code_parser.py CHANGED Viewed

@@ -25,7 +25,7 @@ class TestCodeParser(unittest.TestCase):
     def test_get_format_instructions(self):
         self.assertEqual(
             self.parser.get_format_instructions(),
-            "Output must contain text contained within triple square brackets (```)",
+            "Output must contain text contained within triple backticks (```)",
         )

janus/parsers/code_parser.py CHANGED Viewed

@@ -19,9 +19,9 @@ class CodeParser(JanusParser):
         if code is None:
             raise JanusParserException(
                 text,
-                "Code not find code between triple square brackets",
+                "Code not find code between triple backticks",
             )
         return str(code.group(1))
     def get_format_instructions(self) -> str:
-        return "Output must contain text contained within triple square brackets (```)"
+        return "Output must contain text contained within triple backticks (```)"

janus/parsers/eval_parsers/incose_parser.py CHANGED Viewed

@@ -70,7 +70,6 @@ class IncoseParser(JanusParser, PydanticOutputParser):
         obj = json.loads(text)
-        # For some reason requirements objects are in a double list?
         reqs = obj["requirements"]
         # Generate a unique ID for each requirement (ensure they are unique)
@@ -91,10 +90,11 @@ class IncoseParser(JanusParser, PydanticOutputParser):
         # Strip everything outside the JSON object
         begin, end = text.find("["), text.rfind("]")
-        text = text[begin : end + 1]
+        end += 1 if end != -1 else 0
+        text = text[begin:end]
         try:
-            out: RequirementList = super().parse(text)
+            out: RequirementList = super(IncoseParser, self).parse(text)
         except json.JSONDecodeError as e:
             log.debug(f"Invalid JSON object. Output:\n{text}")
             raise OutputParserException(f"Got invalid JSON object. Error: {e}")

janus/prompts/templates/cyclic/human.txt ADDED Viewed

@@ -0,0 +1,16 @@
+You are tasked with generating code in the {TARGET_LANGUAGE} language given a list of requirements.
+1. Read all requirements.
+2. Write code that addresses all requirments, ensuring that all mentioned conditions are met.
+3. Adhere to the coding conventions and best practices of the {TARGET_LANGUAGE} language.
+4. Ensure the code is correct, well-structured, and includes comments for readability.
+5. The code you provide should be succienct, concise, and runable.
+Here are the requirements for the code:
+```
+{SOURCE_CODE}
+```
+Don't forget to include your final code between triple backticks!

janus/prompts/templates/cyclic/system.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ Your task is to generate code based on the provided requirements. The code should be written in the {TARGET_LANGUAGE} language. Make sure the code adheres to best practices, is efficient, and well-documented.

janus/prompts/templates/eval_prompts/incose/human.txt CHANGED Viewed

@@ -17,7 +17,7 @@ C9 - Conforming: Individual needs and requirements should conform to an approved
 For each and every requirement below, you must indicate whether they "pass" or "fail" each of the above criteria. Briefly explain your reasoning before providing each pass/fail.
-Your response should be formatted as a list of JSON objects, with each object corresponding to one requirement. Each object should include 10 keys: `requirement_id`, `C1`, `C2`, ..., `C9`. `requirement_id` should have a string value that holds the 8-character UUID associated with the requirement. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criterion is passed or failed) and a `score` (the literal string "pass" or "fail").
+Your response should be formatted as a list of JSON objects, with each object corresponding to one requirement. Each object should include 10 keys: `requirement_id`, `C1`, `C2`, ..., `C9`. `requirement_id` should have a string value that holds the 8-character UUID associated with the requirement. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criterion is passed or failed) and a `score` (the literal string "pass" or "fail"). You should also include the requirement itself as a string value for the key `requirement`.
 Be discerning in your evaluation; only very high-quality requirements should pass all criteria. Be a hard grader. If a requirement fails a criterion, be thorough and detailed in your explanation of why.

janus/prompts/templates/extract_variables/human.txt ADDED Viewed

@@ -0,0 +1,5 @@
+Extract all global variables and functions from the {SOURCE_LANGUAGE} code below.
+Here is the source code:
+```
+{SOURCE_CODE}
+```

janus/prompts/templates/extract_variables/system.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ You are a senior software engineer named John and tasked with creating intermediate products of {SOURCE_LANGUAGE} code.

{janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: janus-llm
-Version: 4.4.5
+Version: 4.5.4
 Summary: A transcoding library using LLMs.
 License: Apache 2.0
 Author: Michael Doyle
@@ -24,22 +24,21 @@ Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
 Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
 Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
 Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
-Requires-Dist: nltk (>=3.8.1,<4.0.0)
 Requires-Dist: numpy (>=1.24.3,<2.0.0)
 Requires-Dist: openai (>=1.14.0,<2.0.0)
 Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
-Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
-Requires-Dist: py-rouge (>=1.1,<2.0)
 Requires-Dist: pybind11 (>=2.13.6,<3.0.0)
 Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
 Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
+Requires-Dist: rouge-score (>=0.1.2,<0.2.0)
 Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
 Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
 Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
 Requires-Dist: setuptools (>=75.6.0,<76.0.0)
 Requires-Dist: tesseract (>=0.1.3,<0.2.0)
 Requires-Dist: text-generation (>=0.6.0,<0.7.0)
+Requires-Dist: textstat (>=0.7.5,<0.8.0)
 Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
 Requires-Dist: transformers (>=4.31.0,<5.0.0)
 Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)

janus-llm 4.4.5__py3-none-any.whl → 4.5.4__py3-none-any.whl

janus-llm 4.4.5py3-none-any.whl → 4.5.4py3-none-any.whl