janus-llm 1.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. janus/__init__.py +9 -1
  2. janus/__main__.py +4 -0
  3. janus/_tests/test_cli.py +128 -0
  4. janus/_tests/test_translate.py +49 -7
  5. janus/cli.py +530 -46
  6. janus/converter.py +50 -19
  7. janus/embedding/_tests/test_collections.py +2 -8
  8. janus/embedding/_tests/test_database.py +32 -0
  9. janus/embedding/_tests/test_vectorize.py +9 -4
  10. janus/embedding/collections.py +49 -6
  11. janus/embedding/embedding_models_info.py +130 -0
  12. janus/embedding/vectorize.py +53 -62
  13. janus/language/_tests/__init__.py +0 -0
  14. janus/language/_tests/test_combine.py +62 -0
  15. janus/language/_tests/test_splitter.py +16 -0
  16. janus/language/binary/_tests/test_binary.py +16 -1
  17. janus/language/binary/binary.py +10 -3
  18. janus/language/block.py +31 -30
  19. janus/language/combine.py +26 -34
  20. janus/language/mumps/_tests/test_mumps.py +2 -2
  21. janus/language/mumps/mumps.py +93 -9
  22. janus/language/naive/__init__.py +4 -0
  23. janus/language/naive/basic_splitter.py +14 -0
  24. janus/language/naive/chunk_splitter.py +26 -0
  25. janus/language/naive/registry.py +13 -0
  26. janus/language/naive/simple_ast.py +18 -0
  27. janus/language/naive/tag_splitter.py +61 -0
  28. janus/language/splitter.py +168 -74
  29. janus/language/treesitter/_tests/test_treesitter.py +19 -14
  30. janus/language/treesitter/treesitter.py +37 -13
  31. janus/llm/model_callbacks.py +177 -0
  32. janus/llm/models_info.py +165 -72
  33. janus/metrics/__init__.py +8 -0
  34. janus/metrics/_tests/__init__.py +0 -0
  35. janus/metrics/_tests/reference.py +2 -0
  36. janus/metrics/_tests/target.py +2 -0
  37. janus/metrics/_tests/test_bleu.py +56 -0
  38. janus/metrics/_tests/test_chrf.py +67 -0
  39. janus/metrics/_tests/test_file_pairing.py +59 -0
  40. janus/metrics/_tests/test_llm.py +91 -0
  41. janus/metrics/_tests/test_reading.py +28 -0
  42. janus/metrics/_tests/test_rouge_score.py +65 -0
  43. janus/metrics/_tests/test_similarity_score.py +23 -0
  44. janus/metrics/_tests/test_treesitter_metrics.py +110 -0
  45. janus/metrics/bleu.py +66 -0
  46. janus/metrics/chrf.py +55 -0
  47. janus/metrics/cli.py +7 -0
  48. janus/metrics/complexity_metrics.py +208 -0
  49. janus/metrics/file_pairing.py +113 -0
  50. janus/metrics/llm_metrics.py +202 -0
  51. janus/metrics/metric.py +466 -0
  52. janus/metrics/reading.py +70 -0
  53. janus/metrics/rouge_score.py +96 -0
  54. janus/metrics/similarity.py +53 -0
  55. janus/metrics/splitting.py +38 -0
  56. janus/parsers/_tests/__init__.py +0 -0
  57. janus/parsers/_tests/test_code_parser.py +32 -0
  58. janus/parsers/code_parser.py +24 -253
  59. janus/parsers/doc_parser.py +169 -0
  60. janus/parsers/eval_parser.py +80 -0
  61. janus/parsers/reqs_parser.py +72 -0
  62. janus/prompts/prompt.py +103 -30
  63. janus/translate.py +636 -111
  64. janus/utils/_tests/__init__.py +0 -0
  65. janus/utils/_tests/test_logger.py +67 -0
  66. janus/utils/_tests/test_progress.py +20 -0
  67. janus/utils/enums.py +56 -3
  68. janus/utils/progress.py +56 -0
  69. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/METADATA +27 -11
  70. janus_llm-2.0.1.dist-info/RECORD +94 -0
  71. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/WHEEL +1 -1
  72. janus_llm-1.0.0.dist-info/RECORD +0 -48
  73. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/LICENSE +0 -0
  74. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/entry_points.txt +0 -0
janus/__init__.py CHANGED
@@ -1,3 +1,11 @@
1
+ import warnings
2
+
3
+ from langchain_core._api.deprecation import LangChainDeprecationWarning
4
+
5
+ from .metrics import * # noqa: F403
1
6
  from .translate import Translator
2
7
 
3
- __version__ = "1.0.0"
8
+ __version__ = "2.0.1"
9
+
10
+ # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
+ warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import app
2
+
3
+ if __name__ == "__main__":
4
+ app()
@@ -0,0 +1,128 @@
1
+ import traceback
2
+ import unittest
3
+ from unittest.mock import ANY, patch
4
+
5
+ from typer.testing import CliRunner
6
+
7
+ from ..cli import app, translate
8
+ from ..embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
+ from ..llm.models_info import MODEL_CONFIG_DIR
10
+
11
+
12
+ class TestCli(unittest.TestCase):
13
+ def setUp(self):
14
+ self.runner = CliRunner()
15
+
16
+ def test_help(self):
17
+ result = self.runner.invoke(app, ["--help"])
18
+ self.assertEqual(result.exit_code, 0)
19
+
20
+ def test_version(self):
21
+ result = self.runner.invoke(app, ["--version"])
22
+ self.assertEqual(result.exit_code, 0)
23
+
24
+ def test_invalid_command(self):
25
+ result = self.runner.invoke(app, ["invalid_command"])
26
+ self.assertNotEqual(result.exit_code, 0)
27
+
28
+ def test_no_arguments(self):
29
+ result = self.runner.invoke(app)
30
+ self.assertEqual(result.exit_code, 0)
31
+
32
+ def test_translate_help(self):
33
+ result = self.runner.invoke(app, ["translate", "-h"])
34
+ self.assertEqual(result.exit_code, 0)
35
+
36
+ def test_document_help(self):
37
+ result = self.runner.invoke(app, ["document", "-h"])
38
+ self.assertEqual(result.exit_code, 0)
39
+
40
+ def test_db_help(self):
41
+ result = self.runner.invoke(app, ["db", "-h"])
42
+ self.assertEqual(result.exit_code, 0)
43
+
44
+ def test_llm_help(self):
45
+ result = self.runner.invoke(app, ["llm", "-h"])
46
+ self.assertEqual(result.exit_code, 0)
47
+
48
+ def test_llm_add(self):
49
+ llm_model_path = MODEL_CONFIG_DIR / "test-model-name.json"
50
+ if llm_model_path.exists():
51
+ llm_model_path.unlink()
52
+ result = self.runner.invoke(app, ["llm", "add", "test-model-name"])
53
+ self.assertEqual(result.exit_code, 0)
54
+ self.assertTrue(llm_model_path.exists())
55
+ llm_model_path.unlink()
56
+
57
+ def test_db_ls(self):
58
+ result = self.runner.invoke(app, ["db", "ls"])
59
+ self.assertEqual(result.exit_code, 0)
60
+
61
+ def test_db_status(self):
62
+ result = self.runner.invoke(app, ["db", "status"])
63
+ self.assertEqual(result.exit_code, 0)
64
+
65
+ def test_embedding_add(self):
66
+ embedding_model_path = (
67
+ EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
68
+ )
69
+ if embedding_model_path.exists():
70
+ embedding_model_path.unlink()
71
+ result = self.runner.invoke(
72
+ app, ["embedding", "add", "test-embedding-model-name"]
73
+ )
74
+ self.assertEqual(result.exit_code, 0)
75
+ self.assertTrue(embedding_model_path.exists())
76
+ embedding_model_path.unlink()
77
+
78
+ def test_db_add_and_rm(self):
79
+ embedding_model_path = (
80
+ EMBEDDING_MODEL_CONFIG_DIR / "test-embedding-model-name.json"
81
+ )
82
+ if embedding_model_path.exists():
83
+ embedding_model_path.unlink()
84
+ result = self.runner.invoke(
85
+ app,
86
+ ["embedding", "add", "test-embedding-model-name", "-t", "HuggingFaceLocal"],
87
+ )
88
+ self.assertEqual(result.exit_code, 0)
89
+ result = self.runner.invoke(
90
+ app,
91
+ [
92
+ "db",
93
+ "add",
94
+ "test-db-name",
95
+ "test-embedding-model-name",
96
+ "-i",
97
+ "janus/language/mumps",
98
+ ],
99
+ )
100
+ traceback.print_exception(result.exception)
101
+ embedding_model_path.unlink()
102
+ self.assertEqual(result.exit_code, 0)
103
+
104
+ result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
105
+ self.assertEqual(result.exit_code, 0)
106
+
107
+ @patch("janus.translate.Translator.translate", autospec=True)
108
+ def test_translate(self, mock_translate):
109
+ # Arrange
110
+ mock_instance = mock_translate.return_value
111
+ mock_instance.translate.return_value = None # or whatever you expect
112
+
113
+ # Act
114
+ translate(
115
+ source_lang="matlab",
116
+ target_lang="python",
117
+ input_dir="janus/",
118
+ output_dir="janus/",
119
+ overwrite=True,
120
+ temp=0.7,
121
+ prompt_template="simple",
122
+ parser_type="code",
123
+ collection=None,
124
+ )
125
+
126
+ # Assert
127
+ mock_translate.assert_called_once()
128
+ mock_translate.assert_called_once_with(ANY, "janus/", "janus/", True, None)
@@ -7,7 +7,8 @@ from langchain.schema import Document
7
7
  from langchain.schema.embeddings import Embeddings
8
8
  from langchain.schema.vectorstore import VST, VectorStore
9
9
 
10
- from ..translate import Translator
10
+ from ..language.block import CodeBlock, TranslatedCodeBlock
11
+ from ..translate import DiagramGenerator, Translator
11
12
 
12
13
  # from langchain.vectorstores import Chroma
13
14
 
@@ -75,7 +76,7 @@ class TestTranslator(unittest.TestCase):
75
76
  def setUp(self):
76
77
  """Set up the tests."""
77
78
  self.translator = Translator(
78
- model="gpt-3.5-turbo",
79
+ model="gpt-3.5-turbo-0125",
79
80
  source_language="fortran",
80
81
  target_language="python",
81
82
  target_version="3.10",
@@ -84,7 +85,7 @@ class TestTranslator(unittest.TestCase):
84
85
  self.TEST_FILE_EMBEDDING_COUNT = 14
85
86
 
86
87
  self.req_translator = Translator(
87
- model="gpt-3.5-turbo",
88
+ model="gpt-3.5-turbo-0125",
88
89
  # embeddings_override=MockEmbeddingsFactory(),
89
90
  source_language="fortran",
90
91
  target_language="text",
@@ -317,6 +318,47 @@ class TestTranslator(unittest.TestCase):
317
318
  self.assertRaises(ValueError, self.translator._load_parameters)
318
319
 
319
320
 
321
+ class TestDiagramGenerator(unittest.TestCase):
322
+ """Tests for the DiagramGenerator class."""
323
+
324
+ def setUp(self):
325
+ """Set up the tests."""
326
+ self.diagram_generator = DiagramGenerator(
327
+ model="gpt-3.5-turbo-0125",
328
+ source_language="fortran",
329
+ diagram_type="Activity",
330
+ )
331
+
332
+ def test_init(self):
333
+ """Test __init__ method."""
334
+ self.assertEqual(self.diagram_generator._model_name, "gpt-3.5-turbo-0125")
335
+ self.assertEqual(self.diagram_generator._source_language, "fortran")
336
+ self.assertEqual(self.diagram_generator._diagram_type, "Activity")
337
+
338
+ def test_add_translation(self):
339
+ """Test _add_translation method."""
340
+ block = TranslatedCodeBlock(
341
+ original=CodeBlock(
342
+ id="test",
343
+ name="Test Block",
344
+ node_type="function",
345
+ language="python",
346
+ text="print('Hello, World!')",
347
+ start_point=(0, 0),
348
+ end_point=(1, 0),
349
+ start_byte=0,
350
+ end_byte=1,
351
+ tokens=5,
352
+ children=[],
353
+ ),
354
+ language="python",
355
+ )
356
+ self.diagram_generator._add_translation(block)
357
+ self.assertTrue(block.translated)
358
+ self.assertIsNotNone(block.text)
359
+ self.assertIsNotNone(block.tokens)
360
+
361
+
320
362
  @pytest.mark.parametrize(
321
363
  "source_language,prompt_template,expected_target_language,expected_target_version,"
322
364
  "parser_type",
@@ -337,8 +379,8 @@ def test_language_combinations(
337
379
  """Tests that translator target language settings are consistent
338
380
  with prompt template expectations.
339
381
  """
340
- translator = Translator(model="gpt-3.5-turbo")
341
- translator.set_model("gpt-3.5-turbo-16k")
382
+ translator = Translator(model="gpt-3.5-turbo-0125")
383
+ translator.set_model("gpt-3.5-turbo-0125")
342
384
  translator.set_source_language(source_language)
343
385
  translator.set_target_language(expected_target_language, expected_target_version)
344
386
  translator.set_parser_type(parser_type)
@@ -348,5 +390,5 @@ def test_language_combinations(
348
390
  assert translator._target_version == expected_target_version # nosec
349
391
  assert translator._parser_type == parser_type # nosec
350
392
  assert translator._splitter.language == source_language # nosec
351
- assert translator._splitter.model.model_name == "gpt-3.5-turbo-16k" # nosec
352
- assert translator._prompt_engine._template_name == prompt_template # nosec
393
+ assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
394
+ assert translator._prompt_template_name == prompt_template # nosec