janus-llm 3.1.1__tar.gz → 3.2.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. {janus_llm-3.1.1 → janus_llm-3.2.1}/PKG-INFO +1 -1
  2. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/__init__.py +3 -3
  3. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/_tests/test_cli.py +3 -3
  4. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/cli.py +65 -8
  5. janus_llm-3.2.1/janus/converter/__init__.py +6 -0
  6. janus_llm-3.2.1/janus/converter/_tests/test_translate.py +155 -0
  7. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/converter.py +6 -3
  8. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/translate.py +1 -1
  9. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/_tests/test_collections.py +2 -2
  10. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/_tests/test_database.py +1 -1
  11. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/_tests/test_vectorize.py +3 -3
  12. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/collections.py +2 -2
  13. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/database.py +1 -1
  14. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/embedding_models_info.py +1 -1
  15. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/vectorize.py +5 -5
  16. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/_tests/test_combine.py +1 -1
  17. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/_tests/test_splitter.py +1 -1
  18. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/alc/_tests/test_alc.py +6 -6
  19. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/alc/alc.py +5 -5
  20. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/binary/_tests/test_binary.py +4 -4
  21. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/binary/binary.py +5 -5
  22. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/block.py +2 -2
  23. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/combine.py +3 -3
  24. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/file.py +2 -2
  25. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/mumps/_tests/test_mumps.py +5 -5
  26. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/mumps/mumps.py +5 -5
  27. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/mumps/patterns.py +1 -1
  28. janus_llm-3.2.1/janus/language/naive/__init__.py +4 -0
  29. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/naive/basic_splitter.py +4 -4
  30. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/naive/chunk_splitter.py +4 -4
  31. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/naive/registry.py +1 -1
  32. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/naive/simple_ast.py +5 -5
  33. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/naive/tag_splitter.py +4 -4
  34. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/node.py +1 -1
  35. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/splitter.py +4 -4
  36. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/treesitter/_tests/test_treesitter.py +5 -5
  37. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/treesitter/treesitter.py +4 -4
  38. janus_llm-3.2.1/janus/llm/__init__.py +1 -0
  39. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/llm/model_callbacks.py +1 -1
  40. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/llm/models_info.py +45 -23
  41. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_bleu.py +1 -1
  42. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_chrf.py +1 -1
  43. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_file_pairing.py +1 -1
  44. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_llm.py +5 -5
  45. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_reading.py +1 -1
  46. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_rouge_score.py +1 -1
  47. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_similarity_score.py +1 -1
  48. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/test_treesitter_metrics.py +2 -2
  49. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/bleu.py +1 -1
  50. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/chrf.py +1 -1
  51. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/complexity_metrics.py +4 -4
  52. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/file_pairing.py +5 -5
  53. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/llm_metrics.py +1 -1
  54. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/metric.py +11 -11
  55. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/reading.py +1 -1
  56. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/rouge_score.py +1 -1
  57. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/similarity.py +2 -2
  58. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/_tests/test_code_parser.py +1 -1
  59. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/code_parser.py +2 -2
  60. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/doc_parser.py +3 -3
  61. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/eval_parser.py +2 -2
  62. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/reqs_parser.py +3 -3
  63. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/uml.py +1 -2
  64. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/prompts/prompt.py +2 -2
  65. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/_tests/test_logger.py +1 -1
  66. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/_tests/test_progress.py +1 -1
  67. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/progress.py +1 -1
  68. {janus_llm-3.1.1 → janus_llm-3.2.1}/pyproject.toml +1 -1
  69. janus_llm-3.1.1/janus/converter/__init__.py +0 -6
  70. janus_llm-3.1.1/janus/converter/_tests/test_translate.py +0 -383
  71. janus_llm-3.1.1/janus/language/naive/__init__.py +0 -4
  72. janus_llm-3.1.1/janus/llm/__init__.py +0 -1
  73. {janus_llm-3.1.1 → janus_llm-3.2.1}/LICENSE +0 -0
  74. {janus_llm-3.1.1 → janus_llm-3.2.1}/README.md +0 -0
  75. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/__main__.py +0 -0
  76. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/_tests/__init__.py +0 -0
  77. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/_tests/conftest.py +0 -0
  78. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/_tests/__init__.py +0 -0
  79. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/diagram.py +0 -0
  80. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/document.py +0 -0
  81. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/evaluate.py +0 -0
  82. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/converter/requirements.py +0 -0
  83. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/__init__.py +0 -0
  84. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/embedding/_tests/__init__.py +0 -0
  85. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/__init__.py +0 -0
  86. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/_tests/__init__.py +0 -0
  87. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/alc/__init__.py +0 -0
  88. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/alc/_tests/__init__.py +0 -0
  89. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/binary/__init__.py +0 -0
  90. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/binary/_tests/__init__.py +0 -0
  91. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/binary/reveng/decompile_script.py +0 -0
  92. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/mumps/__init__.py +0 -0
  93. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/mumps/_tests/__init__.py +0 -0
  94. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/treesitter/__init__.py +0 -0
  95. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/language/treesitter/_tests/__init__.py +0 -0
  96. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/__init__.py +0 -0
  97. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/__init__.py +0 -0
  98. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/reference.py +0 -0
  99. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/_tests/target.py +0 -0
  100. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/cli.py +0 -0
  101. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/metrics/splitting.py +0 -0
  102. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/__init__.py +0 -0
  103. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/parsers/_tests/__init__.py +0 -0
  104. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/prompts/__init__.py +0 -0
  105. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/__init__.py +0 -0
  106. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/_tests/__init__.py +0 -0
  107. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/enums.py +0 -0
  108. {janus_llm-3.1.1 → janus_llm-3.2.1}/janus/utils/logger.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 3.1.1
3
+ Version: 3.2.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -2,10 +2,10 @@ import warnings
2
2
 
3
3
  from langchain_core._api.deprecation import LangChainDeprecationWarning
4
4
 
5
- from .converter.translate import Translator
6
- from .metrics import * # noqa: F403
5
+ from janus.converter.translate import Translator
6
+ from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "3.1.1"
8
+ __version__ = "3.2.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -4,9 +4,9 @@ from unittest.mock import ANY, patch
4
4
 
5
5
  from typer.testing import CliRunner
6
6
 
7
- from ..cli import app, translate
8
- from ..embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
- from ..llm.models_info import MODEL_CONFIG_DIR
7
+ from janus.cli import app, translate
8
+ from janus.embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
+ from janus.llm.models_info import MODEL_CONFIG_DIR
10
10
 
11
11
 
12
12
  class TestCli(unittest.TestCase):
@@ -32,8 +32,12 @@ from janus.language.treesitter import TreeSitterSplitter
32
32
  from janus.llm.model_callbacks import COST_PER_1K_TOKENS
33
33
  from janus.llm.models_info import (
34
34
  MODEL_CONFIG_DIR,
35
+ MODEL_ID_TO_LONG_ID,
35
36
  MODEL_TYPE_CONSTRUCTORS,
37
+ MODEL_TYPES,
36
38
  TOKEN_LIMITS,
39
+ bedrock_models,
40
+ openai_models,
37
41
  )
38
42
  from janus.metrics.cli import evaluate
39
43
  from janus.utils.enums import LANGUAGES
@@ -104,7 +108,7 @@ embedding = typer.Typer(
104
108
 
105
109
  def version_callback(value: bool) -> None:
106
110
  if value:
107
- from . import __version__ as version
111
+ from janus import __version__ as version
108
112
 
109
113
  print(f"Janus CLI [blue]v{version}[/blue]")
110
114
  raise typer.Exit()
@@ -179,7 +183,7 @@ def translate(
179
183
  "-L",
180
184
  help="The custom name of the model set with 'janus llm add'.",
181
185
  ),
182
- ] = "gpt-3.5-turbo-0125",
186
+ ] = "gpt-4o",
183
187
  max_prompts: Annotated[
184
188
  int,
185
189
  typer.Option(
@@ -301,7 +305,7 @@ def document(
301
305
  "-L",
302
306
  help="The custom name of the model set with 'janus llm add'.",
303
307
  ),
304
- ] = "gpt-3.5-turbo-0125",
308
+ ] = "gpt-4o",
305
309
  max_prompts: Annotated[
306
310
  int,
307
311
  typer.Option(
@@ -437,7 +441,7 @@ def diagram(
437
441
  "-L",
438
442
  help="The custom name of the model set with 'janus llm add'.",
439
443
  ),
440
- ] = "gpt-3.5-turbo-0125",
444
+ ] = "gpt-4o",
441
445
  max_prompts: Annotated[
442
446
  int,
443
447
  typer.Option(
@@ -800,16 +804,44 @@ def llm_add(
800
804
  "model_cost": {"input": in_cost, "output": out_cost},
801
805
  }
802
806
  elif model_type == "OpenAI":
803
- model_name = typer.prompt("Enter the model name", default="gpt-3.5-turbo-0125")
807
+ model_id = typer.prompt(
808
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
809
+ default="gpt-4o",
810
+ type=click.Choice(openai_models),
811
+ show_choices=False,
812
+ )
804
813
  params = dict(
805
- model_name=model_name,
814
+ # OpenAI uses the "model_name" key for what we're calling "long_model_id"
815
+ model_name=MODEL_ID_TO_LONG_ID[model_id],
806
816
  temperature=0.7,
807
817
  n=1,
808
818
  )
809
- max_tokens = TOKEN_LIMITS[model_name]
810
- model_cost = COST_PER_1K_TOKENS[model_name]
819
+ max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
820
+ model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
821
+ cfg = {
822
+ "model_type": model_type,
823
+ "model_id": model_id,
824
+ "model_args": params,
825
+ "token_limit": max_tokens,
826
+ "model_cost": model_cost,
827
+ }
828
+ elif model_type == "BedrockChat" or model_type == "Bedrock":
829
+ model_id = typer.prompt(
830
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
831
+ default="bedrock-claude-sonnet",
832
+ type=click.Choice(bedrock_models),
833
+ show_choices=False,
834
+ )
835
+ params = dict(
836
+ # Bedrock uses the "model_id" key for what we're calling "long_model_id"
837
+ model_id=MODEL_ID_TO_LONG_ID[model_id],
838
+ model_kwargs={"temperature": 0.7},
839
+ )
840
+ max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
841
+ model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
811
842
  cfg = {
812
843
  "model_type": model_type,
844
+ "model_id": model_id,
813
845
  "model_args": params,
814
846
  "token_limit": max_tokens,
815
847
  "model_cost": model_cost,
@@ -821,6 +853,31 @@ def llm_add(
821
853
  print(f"Model config written to {model_cfg}")
822
854
 
823
855
 
856
+ @llm.command("ls", help="List all of the user-configured models")
857
+ def llm_ls(
858
+ all: Annotated[
859
+ bool,
860
+ typer.Option(
861
+ "--all",
862
+ "-a",
863
+ is_flag=True,
864
+ help="List all models, including the default model IDs.",
865
+ click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
866
+ ),
867
+ ] = False,
868
+ ):
869
+ print("\n[green]User-configured models[/green]:")
870
+ for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
871
+ with open(model_cfg, "r") as f:
872
+ cfg = json.load(f)
873
+ print(f"\t[blue]{model_cfg.stem}[/blue]: [purple]{cfg['model_type']}[/purple]")
874
+
875
+ if all:
876
+ print("\n[green]Available model IDs[/green]:")
877
+ for model_id, model_type in MODEL_TYPES.items():
878
+ print(f"\t[blue]{model_id}[/blue]: [purple]{model_type}[/purple]")
879
+
880
+
824
881
  @embedding.command("add", help="Add an embedding model config to janus")
825
882
  def embedding_add(
826
883
  model_name: Annotated[
@@ -0,0 +1,6 @@
1
+ from janus.converter.converter import Converter
2
+ from janus.converter.diagram import DiagramGenerator
3
+ from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
+ from janus.converter.evaluate import Evaluator
5
+ from janus.converter.requirements import RequirementsDocumenter
6
+ from janus.converter.translate import Translator
@@ -0,0 +1,155 @@
1
+ import unittest
2
+ from pathlib import Path
3
+ from typing import Any, Iterable, List, Optional, Type
4
+
5
+ import pytest
6
+ from langchain.schema import Document
7
+ from langchain.schema.embeddings import Embeddings
8
+ from langchain.schema.vectorstore import VST, VectorStore
9
+
10
+ from janus.converter.diagram import DiagramGenerator
11
+ from janus.converter.requirements import RequirementsDocumenter
12
+ from janus.converter.translate import Translator
13
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
14
+
15
+
16
+ class MockCollection(VectorStore):
17
+ """Vector store for testing"""
18
+
19
+ def __init__(self):
20
+ self._add_texts_calls = 0
21
+
22
+ def add_texts(
23
+ self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any
24
+ ) -> List[str]:
25
+ self._add_texts_calls += 1
26
+ return ["id"]
27
+
28
+ def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
29
+ raise NotImplementedError("similarity_search() not implemented!")
30
+
31
+ @classmethod
32
+ def from_texts(
33
+ cls: Type[VST],
34
+ texts: List[str],
35
+ embedding: Embeddings,
36
+ metadatas: Optional[List[dict]] = None,
37
+ **kwargs: Any,
38
+ ) -> VST:
39
+ raise NotImplementedError("from_texts() not implemented!")
40
+
41
+
42
+ class TestTranslator(unittest.TestCase):
43
+ """Tests for the Translator class."""
44
+
45
+ def setUp(self):
46
+ """Set up the tests."""
47
+ self.translator = Translator(
48
+ model="gpt-4o",
49
+ source_language="fortran",
50
+ target_language="python",
51
+ target_version="3.10",
52
+ )
53
+ self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
54
+ self.TEST_FILE_EMBEDDING_COUNT = 14
55
+
56
+ self.req_translator = RequirementsDocumenter(
57
+ model="gpt-4o",
58
+ source_language="fortran",
59
+ prompt_template="requirements",
60
+ )
61
+
62
+ @pytest.mark.translate
63
+ def test_translate(self):
64
+ """Test translate method."""
65
+ # Delete a file if it's already there
66
+ python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.py"
67
+ python_file.unlink(missing_ok=True)
68
+ python_file.parent.rmdir() if python_file.parent.is_dir() else None
69
+ self.translator.translate(self.test_file.parent, self.test_file.parent / "python")
70
+ # Only check the top-most level functionality, since it should be handled by other
71
+ # unit tests anyway
72
+ self.assertTrue(python_file.exists())
73
+
74
+ def test_invalid_selections(self) -> None:
75
+ """Tests that settings values for the translator will raise exceptions"""
76
+ self.assertRaises(
77
+ ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
78
+ )
79
+ self.assertRaises(
80
+ ValueError, self.translator.set_source_language, "scribbledy-doop"
81
+ )
82
+ self.translator.set_prompt("pish posh")
83
+ self.assertRaises(ValueError, self.translator._load_parameters)
84
+
85
+
86
+ class TestDiagramGenerator(unittest.TestCase):
87
+ """Tests for the DiagramGenerator class."""
88
+
89
+ def setUp(self):
90
+ """Set up the tests."""
91
+ self.diagram_generator = DiagramGenerator(
92
+ model="gpt-4o",
93
+ source_language="fortran",
94
+ diagram_type="Activity",
95
+ )
96
+
97
+ def test_init(self):
98
+ """Test __init__ method."""
99
+ self.assertEqual(self.diagram_generator._model_name, "gpt-4o")
100
+ self.assertEqual(self.diagram_generator._source_language, "fortran")
101
+ self.assertEqual(self.diagram_generator._diagram_type, "Activity")
102
+
103
+ def test_add_translation(self):
104
+ """Test _add_translation method."""
105
+ block = TranslatedCodeBlock(
106
+ original=CodeBlock(
107
+ id="test",
108
+ name="Test Block",
109
+ node_type="function",
110
+ language="python",
111
+ text="print('Hello, World!')",
112
+ start_point=(0, 0),
113
+ end_point=(1, 0),
114
+ start_byte=0,
115
+ end_byte=1,
116
+ tokens=5,
117
+ children=[],
118
+ ),
119
+ language="python",
120
+ )
121
+ self.diagram_generator._add_translation(block)
122
+ self.assertTrue(block.translated)
123
+ self.assertIsNotNone(block.text)
124
+ self.assertIsNotNone(block.tokens)
125
+
126
+
127
+ @pytest.mark.parametrize(
128
+ "source_language,prompt_template,expected_target_language,expected_target_version,",
129
+ [
130
+ ("python", "document_inline", "python", "3.10"),
131
+ ("fortran", "document", "text", None),
132
+ ("mumps", "requirements", "text", None),
133
+ ("python", "simple", "javascript", "es6"),
134
+ ],
135
+ )
136
+ def test_language_combinations(
137
+ source_language: str,
138
+ prompt_template: str,
139
+ expected_target_language: str,
140
+ expected_target_version: str,
141
+ ):
142
+ """Tests that translator target language settings are consistent
143
+ with prompt template expectations.
144
+ """
145
+ translator = Translator(model="gpt-4o")
146
+ translator.set_model("gpt-4o")
147
+ translator.set_source_language(source_language)
148
+ translator.set_target_language(expected_target_language, expected_target_version)
149
+ translator.set_prompt(prompt_template)
150
+ translator._load_parameters()
151
+ assert translator._target_language == expected_target_language # nosec
152
+ assert translator._target_version == expected_target_version # nosec
153
+ assert translator._splitter.language == source_language # nosec
154
+ assert translator._splitter.model.model_name == "gpt-4o" # nosec
155
+ assert translator._prompt_template_name == prompt_template # nosec
@@ -64,7 +64,7 @@ class Converter:
64
64
 
65
65
  def __init__(
66
66
  self,
67
- model: str = "gpt-3.5-turbo-0125",
67
+ model: str = "gpt-4o",
68
68
  model_arguments: dict[str, Any] = {},
69
69
  source_language: str = "fortran",
70
70
  max_prompts: int = 10,
@@ -92,6 +92,7 @@ class Converter:
92
92
  self.override_token_limit: bool = max_tokens is not None
93
93
 
94
94
  self._model_name: str
95
+ self._model_id: str
95
96
  self._custom_model_arguments: dict[str, Any]
96
97
 
97
98
  self._source_language: str
@@ -265,7 +266,9 @@ class Converter:
265
266
  # model_arguments.update(self._custom_model_arguments)
266
267
 
267
268
  # Load the model
268
- self._llm, token_limit, self.model_cost = load_model(self._model_name)
269
+ self._llm, self._model_id, token_limit, self.model_cost = load_model(
270
+ self._model_name
271
+ )
269
272
  # Set the max_tokens to less than half the model's limit to allow for enough
270
273
  # tokens at output
271
274
  # Only modify max_tokens if it is not specified by user
@@ -283,7 +286,7 @@ class Converter:
283
286
  If the relevant fields have not been changed since the last time this
284
287
  method was called, nothing happens.
285
288
  """
286
- prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
289
+ prompt_engine = MODEL_PROMPT_ENGINES[self._model_id](
287
290
  source_language=self._source_language,
288
291
  prompt_template=self._prompt_template_name,
289
292
  )
@@ -90,7 +90,7 @@ class Translator(Converter):
90
90
  f"({self._source_language} != {self._target_language})"
91
91
  )
92
92
 
93
- prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
93
+ prompt_engine = MODEL_PROMPT_ENGINES[self._model_id](
94
94
  source_language=self._source_language,
95
95
  target_language=self._target_language,
96
96
  target_version=self._target_version,
@@ -4,8 +4,8 @@ from unittest.mock import MagicMock
4
4
 
5
5
  import pytest
6
6
 
7
- from ...utils.enums import EmbeddingType
8
- from ..collections import Collections
7
+ from janus.embedding.collections import Collections
8
+ from janus.utils.enums import EmbeddingType
9
9
 
10
10
 
11
11
  class TestCollections(unittest.TestCase):
@@ -2,7 +2,7 @@ import unittest
2
2
  from pathlib import Path
3
3
  from unittest.mock import patch
4
4
 
5
- from ..database import ChromaEmbeddingDatabase, uri_to_path
5
+ from janus.embedding.database import ChromaEmbeddingDatabase, uri_to_path
6
6
 
7
7
 
8
8
  class TestDatabase(unittest.TestCase):
@@ -5,9 +5,9 @@ from unittest.mock import MagicMock
5
5
 
6
6
  from chromadb.api.client import Client
7
7
 
8
- from ...language.treesitter import TreeSitterSplitter
9
- from ...utils.enums import EmbeddingType
10
- from ..vectorize import Vectorizer, VectorizerFactory
8
+ from janus.embedding.vectorize import Vectorizer, VectorizerFactory
9
+ from janus.language.treesitter import TreeSitterSplitter
10
+ from janus.utils.enums import EmbeddingType
11
11
 
12
12
 
13
13
  class MockDBVectorizer(VectorizerFactory):
@@ -5,8 +5,8 @@ from typing import Dict, Optional, Sequence
5
5
  from chromadb import Client, Collection
6
6
  from langchain_community.vectorstores import Chroma
7
7
 
8
- from ..utils.enums import EmbeddingType
9
- from .embedding_models_info import load_embedding_model
8
+ from janus.embedding.embedding_models_info import load_embedding_model
9
+ from janus.utils.enums import EmbeddingType
10
10
 
11
11
  # See https://docs.trychroma.com/telemetry#in-chromas-backend-using-environment-variables
12
12
  os.environ["ANONYMIZED_TELEMETRY"] = "False"
@@ -5,7 +5,7 @@ from urllib.request import url2pathname
5
5
 
6
6
  import chromadb
7
7
 
8
- from ..utils.logger import create_logger
8
+ from janus.utils.logger import create_logger
9
9
 
10
10
  log = create_logger(__name__)
11
11
 
@@ -8,7 +8,7 @@ from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEm
8
8
  from langchain_core.embeddings import Embeddings
9
9
  from langchain_openai import OpenAIEmbeddings
10
10
 
11
- from ..utils.logger import create_logger
11
+ from janus.utils.logger import create_logger
12
12
 
13
13
  load_dotenv()
14
14
 
@@ -6,10 +6,10 @@ from typing import Any, Dict, Optional, Sequence
6
6
  from chromadb import Client, Collection
7
7
  from langchain_community.vectorstores import Chroma
8
8
 
9
- from ..language.block import CodeBlock, TranslatedCodeBlock
10
- from ..utils.enums import EmbeddingType
11
- from .collections import Collections
12
- from .database import ChromaEmbeddingDatabase
9
+ from janus.embedding.collections import Collections
10
+ from janus.embedding.database import ChromaEmbeddingDatabase
11
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
12
+ from janus.utils.enums import EmbeddingType
13
13
 
14
14
 
15
15
  class Vectorizer(object):
@@ -59,7 +59,7 @@ class Vectorizer(object):
59
59
  self,
60
60
  code_block: CodeBlock,
61
61
  collection_name: EmbeddingType | str,
62
- filename: str # perhaps this should be a relative path from the source, but for
62
+ filename: str, # perhaps this should be a relative path from the source, but for
63
63
  # now we're all in 1 directory
64
64
  ) -> None:
65
65
  """Calculate `code_block` embedding, returning success & storing in `embedding_id`
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
 
3
- from ..combine import CodeBlock, Combiner, TranslatedCodeBlock
3
+ from janus.language.combine import CodeBlock, Combiner, TranslatedCodeBlock
4
4
 
5
5
 
6
6
  class TestCombiner(unittest.TestCase):
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
 
3
- from ..splitter import Splitter
3
+ from janus.language.splitter import Splitter
4
4
 
5
5
 
6
6
  class TestSplitter(unittest.TestCase):
@@ -1,9 +1,9 @@
1
1
  import unittest
2
2
  from pathlib import Path
3
3
 
4
- from ....llm import load_model
5
- from ...combine import Combiner
6
- from ..alc import AlcSplitter
4
+ from janus.language.alc import AlcSplitter
5
+ from janus.language.combine import Combiner
6
+ from janus.llm import load_model
7
7
 
8
8
 
9
9
  class TestAlcSplitter(unittest.TestCase):
@@ -11,8 +11,8 @@ class TestAlcSplitter(unittest.TestCase):
11
11
 
12
12
  def setUp(self):
13
13
  """Set up the tests."""
14
- model_name = "gpt-3.5-turbo-0125"
15
- llm, _, _ = load_model(model_name)
14
+ model_name = "gpt-4o"
15
+ llm, _, _, _ = load_model(model_name)
16
16
  self.splitter = AlcSplitter(model=llm)
17
17
  self.combiner = Combiner(language="ibmhlasm")
18
18
  self.test_file = Path("janus/language/alc/_tests/alc.asm")
@@ -20,7 +20,7 @@ class TestAlcSplitter(unittest.TestCase):
20
20
  def test_split(self):
21
21
  """Test the split method."""
22
22
  tree_root = self.splitter.split(self.test_file)
23
- self.assertEqual(tree_root.n_descendents, 34)
23
+ self.assertAlmostEqual(tree_root.n_descendents, 32, delta=5)
24
24
  self.assertLessEqual(tree_root.max_tokens, self.splitter.max_tokens)
25
25
  self.assertFalse(tree_root.complete)
26
26
  self.combiner.combine_children(tree_root)
@@ -1,10 +1,10 @@
1
1
  from langchain.schema.language_model import BaseLanguageModel
2
2
 
3
- from ...utils.logger import create_logger
4
- from ..block import CodeBlock
5
- from ..combine import Combiner
6
- from ..node import NodeType
7
- from ..treesitter import TreeSitterSplitter
3
+ from janus.language.block import CodeBlock
4
+ from janus.language.combine import Combiner
5
+ from janus.language.node import NodeType
6
+ from janus.language.treesitter import TreeSitterSplitter
7
+ from janus.utils.logger import create_logger
8
8
 
9
9
  log = create_logger(__name__)
10
10
 
@@ -5,17 +5,17 @@ from unittest.mock import patch
5
5
 
6
6
  import pytest
7
7
 
8
- from ....llm import load_model
9
- from ..binary import BinarySplitter
8
+ from janus.language.binary import BinarySplitter
9
+ from janus.llm import load_model
10
10
 
11
11
 
12
12
  class TestBinarySplitter(unittest.TestCase):
13
13
  """Tests for the BinarySplitter class."""
14
14
 
15
15
  def setUp(self):
16
- model_name = "gpt-3.5-turbo-0125"
16
+ model_name = "gpt-4o"
17
17
  self.binary_file = Path("janus/language/binary/_tests/hello")
18
- self.llm, _, _ = load_model(model_name)
18
+ self.llm, _, _, _ = load_model(model_name)
19
19
  self.splitter = BinarySplitter(model=self.llm)
20
20
  os.environ["GHIDRA_INSTALL_PATH"] = "~/programs/ghidra_10.4_PUBLIC"
21
21
 
@@ -7,11 +7,11 @@ from pathlib import Path
7
7
  import tree_sitter
8
8
  from langchain.schema.language_model import BaseLanguageModel
9
9
 
10
- from ...utils.enums import LANGUAGES
11
- from ...utils.logger import create_logger
12
- from ..block import CodeBlock
13
- from ..combine import Combiner
14
- from ..treesitter import TreeSitterSplitter
10
+ from janus.language.block import CodeBlock
11
+ from janus.language.combine import Combiner
12
+ from janus.language.treesitter import TreeSitterSplitter
13
+ from janus.utils.enums import LANGUAGES
14
+ from janus.utils.logger import create_logger
15
15
 
16
16
  log = create_logger(__name__)
17
17
 
@@ -1,8 +1,8 @@
1
1
  from functools import total_ordering
2
2
  from typing import ForwardRef, Hashable, Optional, Tuple
3
3
 
4
- from ..utils.logger import create_logger
5
- from .node import NodeType
4
+ from janus.language.node import NodeType
5
+ from janus.utils.logger import create_logger
6
6
 
7
7
  log = create_logger(__name__)
8
8
 
@@ -1,6 +1,6 @@
1
- from ..utils.logger import create_logger
2
- from .block import CodeBlock, TranslatedCodeBlock
3
- from .file import FileManager
1
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
2
+ from janus.language.file import FileManager
3
+ from janus.utils.logger import create_logger
4
4
 
5
5
  log = create_logger(__name__)
6
6
 
@@ -1,5 +1,5 @@
1
- from ..utils.enums import LANGUAGES
2
- from ..utils.logger import create_logger
1
+ from janus.utils.enums import LANGUAGES
2
+ from janus.utils.logger import create_logger
3
3
 
4
4
  log = create_logger(__name__)
5
5
 
@@ -1,9 +1,9 @@
1
1
  import unittest
2
2
  from pathlib import Path
3
3
 
4
- from ....llm import load_model
5
- from ...combine import Combiner
6
- from ..mumps import MumpsSplitter
4
+ from janus.language.combine import Combiner
5
+ from janus.language.mumps import MumpsSplitter
6
+ from janus.llm import load_model
7
7
 
8
8
 
9
9
  class TestMumpsSplitter(unittest.TestCase):
@@ -11,8 +11,8 @@ class TestMumpsSplitter(unittest.TestCase):
11
11
 
12
12
  def setUp(self):
13
13
  """Set up the tests."""
14
- model_name = "gpt-3.5-turbo-0125"
15
- llm, _, _ = load_model(model_name)
14
+ model_name = "gpt-4o"
15
+ llm, _, _, _ = load_model(model_name)
16
16
  self.splitter = MumpsSplitter(model=llm)
17
17
  self.combiner = Combiner(language="mumps")
18
18
  self.test_file = Path("janus/language/mumps/_tests/mumps.m")
@@ -2,11 +2,11 @@ import re
2
2
 
3
3
  from langchain.schema.language_model import BaseLanguageModel
4
4
 
5
- from ...utils.logger import create_logger
6
- from ..block import CodeBlock
7
- from ..combine import Combiner
8
- from ..node import NodeType
9
- from ..splitter import Splitter
5
+ from janus.language.block import CodeBlock
6
+ from janus.language.combine import Combiner
7
+ from janus.language.node import NodeType
8
+ from janus.language.splitter import Splitter
9
+ from janus.utils.logger import create_logger
10
10
 
11
11
  log = create_logger(__name__)
12
12