janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +9 -1
- janus/__main__.py +4 -0
- janus/_tests/test_cli.py +128 -0
- janus/_tests/test_translate.py +49 -7
- janus/cli.py +530 -46
- janus/converter.py +50 -19
- janus/embedding/_tests/test_collections.py +2 -8
- janus/embedding/_tests/test_database.py +32 -0
- janus/embedding/_tests/test_vectorize.py +9 -4
- janus/embedding/collections.py +49 -6
- janus/embedding/embedding_models_info.py +120 -0
- janus/embedding/vectorize.py +53 -62
- janus/language/_tests/__init__.py +0 -0
- janus/language/_tests/test_combine.py +62 -0
- janus/language/_tests/test_splitter.py +16 -0
- janus/language/binary/_tests/test_binary.py +16 -1
- janus/language/binary/binary.py +10 -3
- janus/language/block.py +31 -30
- janus/language/combine.py +26 -34
- janus/language/mumps/_tests/test_mumps.py +2 -2
- janus/language/mumps/mumps.py +93 -9
- janus/language/naive/__init__.py +4 -0
- janus/language/naive/basic_splitter.py +14 -0
- janus/language/naive/chunk_splitter.py +26 -0
- janus/language/naive/registry.py +13 -0
- janus/language/naive/simple_ast.py +18 -0
- janus/language/naive/tag_splitter.py +61 -0
- janus/language/splitter.py +168 -74
- janus/language/treesitter/_tests/test_treesitter.py +9 -6
- janus/language/treesitter/treesitter.py +37 -13
- janus/llm/model_callbacks.py +177 -0
- janus/llm/models_info.py +134 -70
- janus/metrics/__init__.py +8 -0
- janus/metrics/_tests/__init__.py +0 -0
- janus/metrics/_tests/reference.py +2 -0
- janus/metrics/_tests/target.py +2 -0
- janus/metrics/_tests/test_bleu.py +56 -0
- janus/metrics/_tests/test_chrf.py +67 -0
- janus/metrics/_tests/test_file_pairing.py +59 -0
- janus/metrics/_tests/test_llm.py +91 -0
- janus/metrics/_tests/test_reading.py +28 -0
- janus/metrics/_tests/test_rouge_score.py +65 -0
- janus/metrics/_tests/test_similarity_score.py +23 -0
- janus/metrics/_tests/test_treesitter_metrics.py +110 -0
- janus/metrics/bleu.py +66 -0
- janus/metrics/chrf.py +55 -0
- janus/metrics/cli.py +7 -0
- janus/metrics/complexity_metrics.py +208 -0
- janus/metrics/file_pairing.py +113 -0
- janus/metrics/llm_metrics.py +202 -0
- janus/metrics/metric.py +466 -0
- janus/metrics/reading.py +70 -0
- janus/metrics/rouge_score.py +96 -0
- janus/metrics/similarity.py +53 -0
- janus/metrics/splitting.py +38 -0
- janus/parsers/_tests/__init__.py +0 -0
- janus/parsers/_tests/test_code_parser.py +32 -0
- janus/parsers/code_parser.py +24 -253
- janus/parsers/doc_parser.py +169 -0
- janus/parsers/eval_parser.py +80 -0
- janus/parsers/reqs_parser.py +72 -0
- janus/prompts/prompt.py +103 -30
- janus/translate.py +636 -111
- janus/utils/_tests/__init__.py +0 -0
- janus/utils/_tests/test_logger.py +67 -0
- janus/utils/_tests/test_progress.py +20 -0
- janus/utils/enums.py +56 -3
- janus/utils/progress.py +56 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
- janus_llm-2.0.0.dist-info/RECORD +94 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
- janus_llm-1.0.0.dist-info/RECORD +0 -48
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
import warnings
|
2
|
+
|
3
|
+
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
|
+
|
5
|
+
from .metrics import * # noqa: F403
|
1
6
|
from .translate import Translator
|
2
7
|
|
3
|
-
__version__ = "
|
8
|
+
__version__ = "2.0.0"
|
9
|
+
|
10
|
+
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
|
+
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
ADDED
janus/_tests/test_cli.py
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
import traceback
|
2
|
+
import unittest
|
3
|
+
from unittest.mock import ANY, patch
|
4
|
+
|
5
|
+
from typer.testing import CliRunner
|
6
|
+
|
7
|
+
from ..cli import app, translate
|
8
|
+
from ..embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
|
9
|
+
from ..llm.models_info import MODEL_CONFIG_DIR
|
10
|
+
|
11
|
+
|
12
|
+
class TestCli(unittest.TestCase):
|
13
|
+
def setUp(self):
|
14
|
+
self.runner = CliRunner()
|
15
|
+
|
16
|
+
def test_help(self):
|
17
|
+
result = self.runner.invoke(app, ["--help"])
|
18
|
+
self.assertEqual(result.exit_code, 0)
|
19
|
+
|
20
|
+
def test_version(self):
|
21
|
+
result = self.runner.invoke(app, ["--version"])
|
22
|
+
self.assertEqual(result.exit_code, 0)
|
23
|
+
|
24
|
+
def test_invalid_command(self):
|
25
|
+
result = self.runner.invoke(app, ["invalid_command"])
|
26
|
+
self.assertNotEqual(result.exit_code, 0)
|
27
|
+
|
28
|
+
def test_no_arguments(self):
|
29
|
+
result = self.runner.invoke(app)
|
30
|
+
self.assertEqual(result.exit_code, 0)
|
31
|
+
|
32
|
+
def test_translate_help(self):
|
33
|
+
result = self.runner.invoke(app, ["translate", "-h"])
|
34
|
+
self.assertEqual(result.exit_code, 0)
|
35
|
+
|
36
|
+
def test_document_help(self):
|
37
|
+
result = self.runner.invoke(app, ["document", "-h"])
|
38
|
+
self.assertEqual(result.exit_code, 0)
|
39
|
+
|
40
|
+
def test_db_help(self):
|
41
|
+
result = self.runner.invoke(app, ["db", "-h"])
|
42
|
+
self.assertEqual(result.exit_code, 0)
|
43
|
+
|
44
|
+
def test_llm_help(self):
|
45
|
+
result = self.runner.invoke(app, ["llm", "-h"])
|
46
|
+
self.assertEqual(result.exit_code, 0)
|
47
|
+
|
48
|
+
def test_llm_add(self):
|
49
|
+
llm_model_path = MODEL_CONFIG_DIR / "test-model-name.json"
|
50
|
+
if llm_model_path.exists():
|
51
|
+
llm_model_path.unlink()
|
52
|
+
result = self.runner.invoke(app, ["llm", "add", "test-model-name"])
|
53
|
+
self.assertEqual(result.exit_code, 0)
|
54
|
+
self.assertTrue(llm_model_path.exists())
|
55
|
+
llm_model_path.unlink()
|
56
|
+
|
57
|
+
def test_db_ls(self):
|
58
|
+
result = self.runner.invoke(app, ["db", "ls"])
|
59
|
+
self.assertEqual(result.exit_code, 0)
|
60
|
+
|
61
|
+
def test_db_status(self):
|
62
|
+
result = self.runner.invoke(app, ["db", "status"])
|
63
|
+
self.assertEqual(result.exit_code, 0)
|
64
|
+
|
65
|
+
def test_embedding_add(self):
|
66
|
+
embedding_model_path = (
|
67
|
+
EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
|
68
|
+
)
|
69
|
+
if embedding_model_path.exists():
|
70
|
+
embedding_model_path.unlink()
|
71
|
+
result = self.runner.invoke(
|
72
|
+
app, ["embedding", "add", "test-embedding-model-name"]
|
73
|
+
)
|
74
|
+
self.assertEqual(result.exit_code, 0)
|
75
|
+
self.assertTrue(embedding_model_path.exists())
|
76
|
+
embedding_model_path.unlink()
|
77
|
+
|
78
|
+
def test_db_add_and_rm(self):
|
79
|
+
embedding_model_path = (
|
80
|
+
EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
|
81
|
+
)
|
82
|
+
if embedding_model_path.exists():
|
83
|
+
embedding_model_path.unlink()
|
84
|
+
result = self.runner.invoke(
|
85
|
+
app,
|
86
|
+
["embedding", "add", "test-embedding-model-name", "-t", "HuggingFaceLocal"],
|
87
|
+
)
|
88
|
+
self.assertEqual(result.exit_code, 0)
|
89
|
+
result = self.runner.invoke(
|
90
|
+
app,
|
91
|
+
[
|
92
|
+
"db",
|
93
|
+
"add",
|
94
|
+
"test-db-name",
|
95
|
+
"test-embedding-model-name",
|
96
|
+
"-i",
|
97
|
+
"janus/language/mumps",
|
98
|
+
],
|
99
|
+
)
|
100
|
+
traceback.print_exception(result.exception)
|
101
|
+
embedding_model_path.unlink()
|
102
|
+
self.assertEqual(result.exit_code, 0)
|
103
|
+
|
104
|
+
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
|
+
self.assertEqual(result.exit_code, 0)
|
106
|
+
|
107
|
+
@patch("janus.translate.Translator.translate", autospec=True)
|
108
|
+
def test_translate(self, mock_translate):
|
109
|
+
# Arrange
|
110
|
+
mock_instance = mock_translate.return_value
|
111
|
+
mock_instance.translate.return_value = None # or whatever you expect
|
112
|
+
|
113
|
+
# Act
|
114
|
+
translate(
|
115
|
+
source_lang="matlab",
|
116
|
+
target_lang="python",
|
117
|
+
input_dir="janus/",
|
118
|
+
output_dir="janus/",
|
119
|
+
overwrite=True,
|
120
|
+
temp=0.7,
|
121
|
+
prompt_template="simple",
|
122
|
+
parser_type="code",
|
123
|
+
collection=None,
|
124
|
+
)
|
125
|
+
|
126
|
+
# Assert
|
127
|
+
mock_translate.assert_called_once()
|
128
|
+
mock_translate.assert_called_once_with(ANY, "janus/", "janus/", True, None)
|
janus/_tests/test_translate.py
CHANGED
@@ -7,7 +7,8 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from ..
|
10
|
+
from ..language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
from ..translate import DiagramGenerator, Translator
|
11
12
|
|
12
13
|
# from langchain.vectorstores import Chroma
|
13
14
|
|
@@ -75,7 +76,7 @@ class TestTranslator(unittest.TestCase):
|
|
75
76
|
def setUp(self):
|
76
77
|
"""Set up the tests."""
|
77
78
|
self.translator = Translator(
|
78
|
-
model="gpt-3.5-turbo",
|
79
|
+
model="gpt-3.5-turbo-0125",
|
79
80
|
source_language="fortran",
|
80
81
|
target_language="python",
|
81
82
|
target_version="3.10",
|
@@ -84,7 +85,7 @@ class TestTranslator(unittest.TestCase):
|
|
84
85
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
85
86
|
|
86
87
|
self.req_translator = Translator(
|
87
|
-
model="gpt-3.5-turbo",
|
88
|
+
model="gpt-3.5-turbo-0125",
|
88
89
|
# embeddings_override=MockEmbeddingsFactory(),
|
89
90
|
source_language="fortran",
|
90
91
|
target_language="text",
|
@@ -317,6 +318,47 @@ class TestTranslator(unittest.TestCase):
|
|
317
318
|
self.assertRaises(ValueError, self.translator._load_parameters)
|
318
319
|
|
319
320
|
|
321
|
+
class TestDiagramGenerator(unittest.TestCase):
|
322
|
+
"""Tests for the DiagramGenerator class."""
|
323
|
+
|
324
|
+
def setUp(self):
|
325
|
+
"""Set up the tests."""
|
326
|
+
self.diagram_generator = DiagramGenerator(
|
327
|
+
model="gpt-3.5-turbo-0125",
|
328
|
+
source_language="fortran",
|
329
|
+
diagram_type="Activity",
|
330
|
+
)
|
331
|
+
|
332
|
+
def test_init(self):
|
333
|
+
"""Test __init__ method."""
|
334
|
+
self.assertEqual(self.diagram_generator._model_name, "gpt-3.5-turbo-0125")
|
335
|
+
self.assertEqual(self.diagram_generator._source_language, "fortran")
|
336
|
+
self.assertEqual(self.diagram_generator._diagram_type, "Activity")
|
337
|
+
|
338
|
+
def test_add_translation(self):
|
339
|
+
"""Test _add_translation method."""
|
340
|
+
block = TranslatedCodeBlock(
|
341
|
+
original=CodeBlock(
|
342
|
+
id="test",
|
343
|
+
name="Test Block",
|
344
|
+
node_type="function",
|
345
|
+
language="python",
|
346
|
+
text="print('Hello, World!')",
|
347
|
+
start_point=(0, 0),
|
348
|
+
end_point=(1, 0),
|
349
|
+
start_byte=0,
|
350
|
+
end_byte=1,
|
351
|
+
tokens=5,
|
352
|
+
children=[],
|
353
|
+
),
|
354
|
+
language="python",
|
355
|
+
)
|
356
|
+
self.diagram_generator._add_translation(block)
|
357
|
+
self.assertTrue(block.translated)
|
358
|
+
self.assertIsNotNone(block.text)
|
359
|
+
self.assertIsNotNone(block.tokens)
|
360
|
+
|
361
|
+
|
320
362
|
@pytest.mark.parametrize(
|
321
363
|
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
322
364
|
"parser_type",
|
@@ -337,8 +379,8 @@ def test_language_combinations(
|
|
337
379
|
"""Tests that translator target language settings are consistent
|
338
380
|
with prompt template expectations.
|
339
381
|
"""
|
340
|
-
translator = Translator(model="gpt-3.5-turbo")
|
341
|
-
translator.set_model("gpt-3.5-turbo-
|
382
|
+
translator = Translator(model="gpt-3.5-turbo-0125")
|
383
|
+
translator.set_model("gpt-3.5-turbo-0125")
|
342
384
|
translator.set_source_language(source_language)
|
343
385
|
translator.set_target_language(expected_target_language, expected_target_version)
|
344
386
|
translator.set_parser_type(parser_type)
|
@@ -348,5 +390,5 @@ def test_language_combinations(
|
|
348
390
|
assert translator._target_version == expected_target_version # nosec
|
349
391
|
assert translator._parser_type == parser_type # nosec
|
350
392
|
assert translator._splitter.language == source_language # nosec
|
351
|
-
assert translator._splitter.model.model_name == "gpt-3.5-turbo-
|
352
|
-
assert translator.
|
393
|
+
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
|
+
assert translator._prompt_template_name == prompt_template # nosec
|