janus-llm 1.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +9 -1
- janus/__main__.py +4 -0
- janus/_tests/test_cli.py +128 -0
- janus/_tests/test_translate.py +49 -7
- janus/cli.py +530 -46
- janus/converter.py +50 -19
- janus/embedding/_tests/test_collections.py +2 -8
- janus/embedding/_tests/test_database.py +32 -0
- janus/embedding/_tests/test_vectorize.py +9 -4
- janus/embedding/collections.py +49 -6
- janus/embedding/embedding_models_info.py +130 -0
- janus/embedding/vectorize.py +53 -62
- janus/language/_tests/__init__.py +0 -0
- janus/language/_tests/test_combine.py +62 -0
- janus/language/_tests/test_splitter.py +16 -0
- janus/language/binary/_tests/test_binary.py +16 -1
- janus/language/binary/binary.py +10 -3
- janus/language/block.py +31 -30
- janus/language/combine.py +26 -34
- janus/language/mumps/_tests/test_mumps.py +2 -2
- janus/language/mumps/mumps.py +93 -9
- janus/language/naive/__init__.py +4 -0
- janus/language/naive/basic_splitter.py +14 -0
- janus/language/naive/chunk_splitter.py +26 -0
- janus/language/naive/registry.py +13 -0
- janus/language/naive/simple_ast.py +18 -0
- janus/language/naive/tag_splitter.py +61 -0
- janus/language/splitter.py +168 -74
- janus/language/treesitter/_tests/test_treesitter.py +19 -14
- janus/language/treesitter/treesitter.py +37 -13
- janus/llm/model_callbacks.py +177 -0
- janus/llm/models_info.py +165 -72
- janus/metrics/__init__.py +8 -0
- janus/metrics/_tests/__init__.py +0 -0
- janus/metrics/_tests/reference.py +2 -0
- janus/metrics/_tests/target.py +2 -0
- janus/metrics/_tests/test_bleu.py +56 -0
- janus/metrics/_tests/test_chrf.py +67 -0
- janus/metrics/_tests/test_file_pairing.py +59 -0
- janus/metrics/_tests/test_llm.py +91 -0
- janus/metrics/_tests/test_reading.py +28 -0
- janus/metrics/_tests/test_rouge_score.py +65 -0
- janus/metrics/_tests/test_similarity_score.py +23 -0
- janus/metrics/_tests/test_treesitter_metrics.py +110 -0
- janus/metrics/bleu.py +66 -0
- janus/metrics/chrf.py +55 -0
- janus/metrics/cli.py +7 -0
- janus/metrics/complexity_metrics.py +208 -0
- janus/metrics/file_pairing.py +113 -0
- janus/metrics/llm_metrics.py +202 -0
- janus/metrics/metric.py +466 -0
- janus/metrics/reading.py +70 -0
- janus/metrics/rouge_score.py +96 -0
- janus/metrics/similarity.py +53 -0
- janus/metrics/splitting.py +38 -0
- janus/parsers/_tests/__init__.py +0 -0
- janus/parsers/_tests/test_code_parser.py +32 -0
- janus/parsers/code_parser.py +24 -253
- janus/parsers/doc_parser.py +169 -0
- janus/parsers/eval_parser.py +80 -0
- janus/parsers/reqs_parser.py +72 -0
- janus/prompts/prompt.py +103 -30
- janus/translate.py +636 -111
- janus/utils/_tests/__init__.py +0 -0
- janus/utils/_tests/test_logger.py +67 -0
- janus/utils/_tests/test_progress.py +20 -0
- janus/utils/enums.py +56 -3
- janus/utils/progress.py +56 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/METADATA +27 -11
- janus_llm-2.0.1.dist-info/RECORD +94 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/WHEEL +1 -1
- janus_llm-1.0.0.dist-info/RECORD +0 -48
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/LICENSE +0 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
import warnings
|
2
|
+
|
3
|
+
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
|
+
|
5
|
+
from .metrics import * # noqa: F403
|
1
6
|
from .translate import Translator
|
2
7
|
|
3
|
-
__version__ = "
|
8
|
+
__version__ = "2.0.1"
|
9
|
+
|
10
|
+
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
|
+
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
ADDED
janus/_tests/test_cli.py
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
import traceback
|
2
|
+
import unittest
|
3
|
+
from unittest.mock import ANY, patch
|
4
|
+
|
5
|
+
from typer.testing import CliRunner
|
6
|
+
|
7
|
+
from ..cli import app, translate
|
8
|
+
from ..embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
|
9
|
+
from ..llm.models_info import MODEL_CONFIG_DIR
|
10
|
+
|
11
|
+
|
12
|
+
class TestCli(unittest.TestCase):
|
13
|
+
def setUp(self):
|
14
|
+
self.runner = CliRunner()
|
15
|
+
|
16
|
+
def test_help(self):
|
17
|
+
result = self.runner.invoke(app, ["--help"])
|
18
|
+
self.assertEqual(result.exit_code, 0)
|
19
|
+
|
20
|
+
def test_version(self):
|
21
|
+
result = self.runner.invoke(app, ["--version"])
|
22
|
+
self.assertEqual(result.exit_code, 0)
|
23
|
+
|
24
|
+
def test_invalid_command(self):
|
25
|
+
result = self.runner.invoke(app, ["invalid_command"])
|
26
|
+
self.assertNotEqual(result.exit_code, 0)
|
27
|
+
|
28
|
+
def test_no_arguments(self):
|
29
|
+
result = self.runner.invoke(app)
|
30
|
+
self.assertEqual(result.exit_code, 0)
|
31
|
+
|
32
|
+
def test_translate_help(self):
|
33
|
+
result = self.runner.invoke(app, ["translate", "-h"])
|
34
|
+
self.assertEqual(result.exit_code, 0)
|
35
|
+
|
36
|
+
def test_document_help(self):
|
37
|
+
result = self.runner.invoke(app, ["document", "-h"])
|
38
|
+
self.assertEqual(result.exit_code, 0)
|
39
|
+
|
40
|
+
def test_db_help(self):
|
41
|
+
result = self.runner.invoke(app, ["db", "-h"])
|
42
|
+
self.assertEqual(result.exit_code, 0)
|
43
|
+
|
44
|
+
def test_llm_help(self):
|
45
|
+
result = self.runner.invoke(app, ["llm", "-h"])
|
46
|
+
self.assertEqual(result.exit_code, 0)
|
47
|
+
|
48
|
+
def test_llm_add(self):
|
49
|
+
llm_model_path = MODEL_CONFIG_DIR / "test-model-name.json"
|
50
|
+
if llm_model_path.exists():
|
51
|
+
llm_model_path.unlink()
|
52
|
+
result = self.runner.invoke(app, ["llm", "add", "test-model-name"])
|
53
|
+
self.assertEqual(result.exit_code, 0)
|
54
|
+
self.assertTrue(llm_model_path.exists())
|
55
|
+
llm_model_path.unlink()
|
56
|
+
|
57
|
+
def test_db_ls(self):
|
58
|
+
result = self.runner.invoke(app, ["db", "ls"])
|
59
|
+
self.assertEqual(result.exit_code, 0)
|
60
|
+
|
61
|
+
def test_db_status(self):
|
62
|
+
result = self.runner.invoke(app, ["db", "status"])
|
63
|
+
self.assertEqual(result.exit_code, 0)
|
64
|
+
|
65
|
+
def test_embedding_add(self):
|
66
|
+
embedding_model_path = (
|
67
|
+
EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
|
68
|
+
)
|
69
|
+
if embedding_model_path.exists():
|
70
|
+
embedding_model_path.unlink()
|
71
|
+
result = self.runner.invoke(
|
72
|
+
app, ["embedding", "add", "test-embedding-model-name"]
|
73
|
+
)
|
74
|
+
self.assertEqual(result.exit_code, 0)
|
75
|
+
self.assertTrue(embedding_model_path.exists())
|
76
|
+
embedding_model_path.unlink()
|
77
|
+
|
78
|
+
def test_db_add_and_rm(self):
|
79
|
+
embedding_model_path = (
|
80
|
+
EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
|
81
|
+
)
|
82
|
+
if embedding_model_path.exists():
|
83
|
+
embedding_model_path.unlink()
|
84
|
+
result = self.runner.invoke(
|
85
|
+
app,
|
86
|
+
["embedding", "add", "test-embedding-model-name", "-t", "HuggingFaceLocal"],
|
87
|
+
)
|
88
|
+
self.assertEqual(result.exit_code, 0)
|
89
|
+
result = self.runner.invoke(
|
90
|
+
app,
|
91
|
+
[
|
92
|
+
"db",
|
93
|
+
"add",
|
94
|
+
"test-db-name",
|
95
|
+
"test-embedding-model-name",
|
96
|
+
"-i",
|
97
|
+
"janus/language/mumps",
|
98
|
+
],
|
99
|
+
)
|
100
|
+
traceback.print_exception(result.exception)
|
101
|
+
embedding_model_path.unlink()
|
102
|
+
self.assertEqual(result.exit_code, 0)
|
103
|
+
|
104
|
+
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
|
+
self.assertEqual(result.exit_code, 0)
|
106
|
+
|
107
|
+
@patch("janus.translate.Translator.translate", autospec=True)
|
108
|
+
def test_translate(self, mock_translate):
|
109
|
+
# Arrange
|
110
|
+
mock_instance = mock_translate.return_value
|
111
|
+
mock_instance.translate.return_value = None # or whatever you expect
|
112
|
+
|
113
|
+
# Act
|
114
|
+
translate(
|
115
|
+
source_lang="matlab",
|
116
|
+
target_lang="python",
|
117
|
+
input_dir="janus/",
|
118
|
+
output_dir="janus/",
|
119
|
+
overwrite=True,
|
120
|
+
temp=0.7,
|
121
|
+
prompt_template="simple",
|
122
|
+
parser_type="code",
|
123
|
+
collection=None,
|
124
|
+
)
|
125
|
+
|
126
|
+
# Assert
|
127
|
+
mock_translate.assert_called_once()
|
128
|
+
mock_translate.assert_called_once_with(ANY, "janus/", "janus/", True, None)
|
janus/_tests/test_translate.py
CHANGED
@@ -7,7 +7,8 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from ..
|
10
|
+
from ..language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
from ..translate import DiagramGenerator, Translator
|
11
12
|
|
12
13
|
# from langchain.vectorstores import Chroma
|
13
14
|
|
@@ -75,7 +76,7 @@ class TestTranslator(unittest.TestCase):
|
|
75
76
|
def setUp(self):
|
76
77
|
"""Set up the tests."""
|
77
78
|
self.translator = Translator(
|
78
|
-
model="gpt-3.5-turbo",
|
79
|
+
model="gpt-3.5-turbo-0125",
|
79
80
|
source_language="fortran",
|
80
81
|
target_language="python",
|
81
82
|
target_version="3.10",
|
@@ -84,7 +85,7 @@ class TestTranslator(unittest.TestCase):
|
|
84
85
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
85
86
|
|
86
87
|
self.req_translator = Translator(
|
87
|
-
model="gpt-3.5-turbo",
|
88
|
+
model="gpt-3.5-turbo-0125",
|
88
89
|
# embeddings_override=MockEmbeddingsFactory(),
|
89
90
|
source_language="fortran",
|
90
91
|
target_language="text",
|
@@ -317,6 +318,47 @@ class TestTranslator(unittest.TestCase):
|
|
317
318
|
self.assertRaises(ValueError, self.translator._load_parameters)
|
318
319
|
|
319
320
|
|
321
|
+
class TestDiagramGenerator(unittest.TestCase):
|
322
|
+
"""Tests for the DiagramGenerator class."""
|
323
|
+
|
324
|
+
def setUp(self):
|
325
|
+
"""Set up the tests."""
|
326
|
+
self.diagram_generator = DiagramGenerator(
|
327
|
+
model="gpt-3.5-turbo-0125",
|
328
|
+
source_language="fortran",
|
329
|
+
diagram_type="Activity",
|
330
|
+
)
|
331
|
+
|
332
|
+
def test_init(self):
|
333
|
+
"""Test __init__ method."""
|
334
|
+
self.assertEqual(self.diagram_generator._model_name, "gpt-3.5-turbo-0125")
|
335
|
+
self.assertEqual(self.diagram_generator._source_language, "fortran")
|
336
|
+
self.assertEqual(self.diagram_generator._diagram_type, "Activity")
|
337
|
+
|
338
|
+
def test_add_translation(self):
|
339
|
+
"""Test _add_translation method."""
|
340
|
+
block = TranslatedCodeBlock(
|
341
|
+
original=CodeBlock(
|
342
|
+
id="test",
|
343
|
+
name="Test Block",
|
344
|
+
node_type="function",
|
345
|
+
language="python",
|
346
|
+
text="print('Hello, World!')",
|
347
|
+
start_point=(0, 0),
|
348
|
+
end_point=(1, 0),
|
349
|
+
start_byte=0,
|
350
|
+
end_byte=1,
|
351
|
+
tokens=5,
|
352
|
+
children=[],
|
353
|
+
),
|
354
|
+
language="python",
|
355
|
+
)
|
356
|
+
self.diagram_generator._add_translation(block)
|
357
|
+
self.assertTrue(block.translated)
|
358
|
+
self.assertIsNotNone(block.text)
|
359
|
+
self.assertIsNotNone(block.tokens)
|
360
|
+
|
361
|
+
|
320
362
|
@pytest.mark.parametrize(
|
321
363
|
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
322
364
|
"parser_type",
|
@@ -337,8 +379,8 @@ def test_language_combinations(
|
|
337
379
|
"""Tests that translator target language settings are consistent
|
338
380
|
with prompt template expectations.
|
339
381
|
"""
|
340
|
-
translator = Translator(model="gpt-3.5-turbo")
|
341
|
-
translator.set_model("gpt-3.5-turbo-
|
382
|
+
translator = Translator(model="gpt-3.5-turbo-0125")
|
383
|
+
translator.set_model("gpt-3.5-turbo-0125")
|
342
384
|
translator.set_source_language(source_language)
|
343
385
|
translator.set_target_language(expected_target_language, expected_target_version)
|
344
386
|
translator.set_parser_type(parser_type)
|
@@ -348,5 +390,5 @@ def test_language_combinations(
|
|
348
390
|
assert translator._target_version == expected_target_version # nosec
|
349
391
|
assert translator._parser_type == parser_type # nosec
|
350
392
|
assert translator._splitter.language == source_language # nosec
|
351
|
-
assert translator._splitter.model.model_name == "gpt-3.5-turbo-
|
352
|
-
assert translator.
|
393
|
+
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
|
+
assert translator._prompt_template_name == prompt_template # nosec
|