janus-llm 2.1.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +2 -2
- janus/__main__.py +1 -1
- janus/_tests/test_cli.py +1 -2
- janus/cli.py +43 -50
- janus/converter/__init__.py +6 -0
- janus/converter/_tests/__init__.py +0 -0
- janus/{_tests → converter/_tests}/test_translate.py +11 -22
- janus/converter/converter.py +614 -0
- janus/converter/diagram.py +124 -0
- janus/converter/document.py +131 -0
- janus/converter/evaluate.py +15 -0
- janus/converter/requirements.py +50 -0
- janus/converter/translate.py +108 -0
- janus/language/block.py +1 -1
- janus/language/combine.py +0 -1
- janus/language/treesitter/treesitter.py +20 -1
- janus/llm/model_callbacks.py +33 -36
- janus/llm/models_info.py +14 -0
- janus/metrics/reading.py +27 -5
- janus/prompts/prompt.py +37 -11
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.0.dist-info}/RECORD +25 -19
- janus/converter.py +0 -161
- janus/translate.py +0 -987
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -2,10 +2,10 @@ import warnings
|
|
2
2
|
|
3
3
|
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
4
|
|
5
|
+
from .converter.translate import Translator
|
5
6
|
from .metrics import * # noqa: F403
|
6
|
-
from .translate import Translator
|
7
7
|
|
8
|
-
__version__ = "
|
8
|
+
__version__ = "3.0.0"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
CHANGED
janus/_tests/test_cli.py
CHANGED
@@ -104,7 +104,7 @@ class TestCli(unittest.TestCase):
|
|
104
104
|
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
105
|
self.assertEqual(result.exit_code, 0)
|
106
106
|
|
107
|
-
@patch("janus.translate.Translator.translate", autospec=True)
|
107
|
+
@patch("janus.converter.translate.Translator.translate", autospec=True)
|
108
108
|
def test_translate(self, mock_translate):
|
109
109
|
# Arrange
|
110
110
|
mock_instance = mock_translate.return_value
|
@@ -119,7 +119,6 @@ class TestCli(unittest.TestCase):
|
|
119
119
|
overwrite=True,
|
120
120
|
temp=0.7,
|
121
121
|
prompt_template="simple",
|
122
|
-
parser_type="code",
|
123
122
|
collection=None,
|
124
123
|
)
|
125
124
|
|
janus/cli.py
CHANGED
@@ -12,33 +12,32 @@ from rich.console import Console
|
|
12
12
|
from rich.prompt import Confirm
|
13
13
|
from typing_extensions import Annotated
|
14
14
|
|
15
|
-
from .
|
16
|
-
from .
|
17
|
-
from .
|
15
|
+
from janus.converter.diagram import DiagramGenerator
|
16
|
+
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
17
|
+
from janus.converter.requirements import RequirementsDocumenter
|
18
|
+
from janus.converter.translate import Translator
|
19
|
+
from janus.embedding.collections import Collections
|
20
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
21
|
+
from janus.embedding.embedding_models_info import (
|
18
22
|
EMBEDDING_COST_PER_MODEL,
|
19
23
|
EMBEDDING_MODEL_CONFIG_DIR,
|
20
24
|
EMBEDDING_TOKEN_LIMITS,
|
21
25
|
EmbeddingModelType,
|
22
26
|
)
|
23
|
-
from .embedding.vectorize import ChromaDBVectorizer
|
24
|
-
from .language.binary import BinarySplitter
|
25
|
-
from .language.mumps import MumpsSplitter
|
26
|
-
from .language.naive.registry import CUSTOM_SPLITTERS
|
27
|
-
from .language.treesitter import TreeSitterSplitter
|
28
|
-
from .llm.model_callbacks import COST_PER_1K_TOKENS
|
29
|
-
from .llm.models_info import
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
DiagramGenerator,
|
34
|
-
Documenter,
|
35
|
-
MadLibsDocumenter,
|
36
|
-
MultiDocumenter,
|
37
|
-
RequirementsDocumenter,
|
38
|
-
Translator,
|
27
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
28
|
+
from janus.language.binary import BinarySplitter
|
29
|
+
from janus.language.mumps import MumpsSplitter
|
30
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
31
|
+
from janus.language.treesitter import TreeSitterSplitter
|
32
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
33
|
+
from janus.llm.models_info import (
|
34
|
+
MODEL_CONFIG_DIR,
|
35
|
+
MODEL_TYPE_CONSTRUCTORS,
|
36
|
+
TOKEN_LIMITS,
|
39
37
|
)
|
40
|
-
from .
|
41
|
-
from .utils.
|
38
|
+
from janus.metrics.cli import evaluate
|
39
|
+
from janus.utils.enums import LANGUAGES
|
40
|
+
from janus.utils.logger import create_logger
|
42
41
|
|
43
42
|
httpx_logger = logging.getLogger("httpx")
|
44
43
|
httpx_logger.setLevel(logging.WARNING)
|
@@ -71,10 +70,14 @@ def get_collections_config():
|
|
71
70
|
|
72
71
|
|
73
72
|
app = typer.Typer(
|
74
|
-
help=
|
73
|
+
help=(
|
74
|
+
"[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
|
75
|
+
"documenting, and diagramming code using large language models.[/bold]"
|
76
|
+
),
|
75
77
|
add_completion=False,
|
76
78
|
no_args_is_help=True,
|
77
79
|
context_settings={"help_option_names": ["-h", "--help"]},
|
80
|
+
rich_markup_mode="rich",
|
78
81
|
)
|
79
82
|
|
80
83
|
|
@@ -206,15 +209,6 @@ def translate(
|
|
206
209
|
"path to a directory containing those template files.",
|
207
210
|
),
|
208
211
|
] = "simple",
|
209
|
-
parser_type: Annotated[
|
210
|
-
str,
|
211
|
-
typer.Option(
|
212
|
-
"--parser",
|
213
|
-
"-P",
|
214
|
-
click_type=click.Choice(sorted(PARSER_TYPES)),
|
215
|
-
help="The type of parser to use.",
|
216
|
-
),
|
217
|
-
] = "code",
|
218
212
|
collection: Annotated[
|
219
213
|
str,
|
220
214
|
typer.Option(
|
@@ -224,15 +218,15 @@ def translate(
|
|
224
218
|
"collection with the name provided.",
|
225
219
|
),
|
226
220
|
] = None,
|
227
|
-
|
228
|
-
|
221
|
+
splitter_type: Annotated[
|
222
|
+
str,
|
229
223
|
typer.Option(
|
230
|
-
"-
|
231
|
-
"--
|
224
|
+
"-S",
|
225
|
+
"--splitter",
|
232
226
|
help="Name of custom splitter to use",
|
233
227
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
234
228
|
),
|
235
|
-
] =
|
229
|
+
] = "file",
|
236
230
|
max_tokens: Annotated[
|
237
231
|
int,
|
238
232
|
typer.Option(
|
@@ -264,10 +258,9 @@ def translate(
|
|
264
258
|
max_prompts=max_prompts,
|
265
259
|
max_tokens=max_tokens,
|
266
260
|
prompt_template=prompt_template,
|
267
|
-
parser_type=parser_type,
|
268
261
|
db_path=db_loc,
|
269
262
|
db_config=collections_config,
|
270
|
-
|
263
|
+
splitter_type=splitter_type,
|
271
264
|
)
|
272
265
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
273
266
|
|
@@ -363,15 +356,15 @@ def document(
|
|
363
356
|
"collection with the name provided.",
|
364
357
|
),
|
365
358
|
] = None,
|
366
|
-
|
367
|
-
|
359
|
+
splitter_type: Annotated[
|
360
|
+
str,
|
368
361
|
typer.Option(
|
369
|
-
"-
|
370
|
-
"--
|
362
|
+
"-S",
|
363
|
+
"--splitter",
|
371
364
|
help="Name of custom splitter to use",
|
372
365
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
373
366
|
),
|
374
|
-
] =
|
367
|
+
] = "file",
|
375
368
|
max_tokens: Annotated[
|
376
369
|
int,
|
377
370
|
typer.Option(
|
@@ -392,7 +385,7 @@ def document(
|
|
392
385
|
max_tokens=max_tokens,
|
393
386
|
db_path=db_loc,
|
394
387
|
db_config=collections_config,
|
395
|
-
|
388
|
+
splitter_type=splitter_type,
|
396
389
|
)
|
397
390
|
if doc_mode == "madlibs":
|
398
391
|
documenter = MadLibsDocumenter(
|
@@ -488,15 +481,15 @@ def diagram(
|
|
488
481
|
help="Whether to use documentation in generation",
|
489
482
|
),
|
490
483
|
] = False,
|
491
|
-
|
492
|
-
|
484
|
+
splitter_type: Annotated[
|
485
|
+
str,
|
493
486
|
typer.Option(
|
494
|
-
"-
|
495
|
-
"--
|
487
|
+
"-S",
|
488
|
+
"--splitter",
|
496
489
|
help="Name of custom splitter to use",
|
497
490
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
498
491
|
),
|
499
|
-
] =
|
492
|
+
] = "file",
|
500
493
|
):
|
501
494
|
model_arguments = dict(temperature=temperature)
|
502
495
|
collections_config = get_collections_config()
|
@@ -509,7 +502,7 @@ def diagram(
|
|
509
502
|
db_config=collections_config,
|
510
503
|
diagram_type=diagram_type,
|
511
504
|
add_documentation=add_documentation,
|
512
|
-
|
505
|
+
splitter_type=splitter_type,
|
513
506
|
)
|
514
507
|
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
515
508
|
|
File without changes
|
@@ -7,8 +7,11 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from
|
11
|
-
|
10
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
|
12
|
+
from ..diagram import DiagramGenerator
|
13
|
+
from ..requirements import RequirementsDocumenter
|
14
|
+
from ..translate import Translator
|
12
15
|
|
13
16
|
# from langchain.vectorstores import Chroma
|
14
17
|
|
@@ -84,14 +87,10 @@ class TestTranslator(unittest.TestCase):
|
|
84
87
|
self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
|
85
88
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
86
89
|
|
87
|
-
self.req_translator =
|
90
|
+
self.req_translator = RequirementsDocumenter(
|
88
91
|
model="gpt-3.5-turbo-0125",
|
89
|
-
# embeddings_override=MockEmbeddingsFactory(),
|
90
92
|
source_language="fortran",
|
91
|
-
target_language="text",
|
92
|
-
target_version="3.10",
|
93
93
|
prompt_template="requirements",
|
94
|
-
parser_type="text",
|
95
94
|
)
|
96
95
|
|
97
96
|
@pytest.mark.translate
|
@@ -273,11 +272,6 @@ class TestTranslator(unittest.TestCase):
|
|
273
272
|
# print_query_results(QUERY_STRING, n_results)
|
274
273
|
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
275
274
|
|
276
|
-
def test_output_as_requirements(self):
|
277
|
-
"""Is output type requirements?"""
|
278
|
-
self.assertFalse(self.translator.outputting_requirements())
|
279
|
-
self.assertTrue(self.req_translator.outputting_requirements())
|
280
|
-
|
281
275
|
# @pytest.mark.slow
|
282
276
|
# def test_document_embeddings_added_by_translate(self):
|
283
277
|
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
@@ -310,7 +304,6 @@ class TestTranslator(unittest.TestCase):
|
|
310
304
|
self.assertRaises(
|
311
305
|
ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
|
312
306
|
)
|
313
|
-
self.assertRaises(ValueError, self.translator.set_parser_type, "blah")
|
314
307
|
self.assertRaises(
|
315
308
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
316
309
|
)
|
@@ -360,13 +353,12 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
360
353
|
|
361
354
|
|
362
355
|
@pytest.mark.parametrize(
|
363
|
-
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
364
|
-
"parser_type",
|
356
|
+
"source_language,prompt_template,expected_target_language,expected_target_version,",
|
365
357
|
[
|
366
|
-
("python", "document_inline", "python", "3.10"
|
367
|
-
("fortran", "document", "text", None
|
368
|
-
("mumps", "requirements", "text", None
|
369
|
-
("python", "simple", "javascript", "es6"
|
358
|
+
("python", "document_inline", "python", "3.10"),
|
359
|
+
("fortran", "document", "text", None),
|
360
|
+
("mumps", "requirements", "text", None),
|
361
|
+
("python", "simple", "javascript", "es6"),
|
370
362
|
],
|
371
363
|
)
|
372
364
|
def test_language_combinations(
|
@@ -374,7 +366,6 @@ def test_language_combinations(
|
|
374
366
|
prompt_template: str,
|
375
367
|
expected_target_language: str,
|
376
368
|
expected_target_version: str,
|
377
|
-
parser_type: str,
|
378
369
|
):
|
379
370
|
"""Tests that translator target language settings are consistent
|
380
371
|
with prompt template expectations.
|
@@ -383,12 +374,10 @@ def test_language_combinations(
|
|
383
374
|
translator.set_model("gpt-3.5-turbo-0125")
|
384
375
|
translator.set_source_language(source_language)
|
385
376
|
translator.set_target_language(expected_target_language, expected_target_version)
|
386
|
-
translator.set_parser_type(parser_type)
|
387
377
|
translator.set_prompt(prompt_template)
|
388
378
|
translator._load_parameters()
|
389
379
|
assert translator._target_language == expected_target_language # nosec
|
390
380
|
assert translator._target_version == expected_target_version # nosec
|
391
|
-
assert translator._parser_type == parser_type # nosec
|
392
381
|
assert translator._splitter.language == source_language # nosec
|
393
382
|
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
383
|
assert translator._prompt_template_name == prompt_template # nosec
|