janus-llm 2.1.0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +2 -2
- janus/__main__.py +1 -1
- janus/_tests/test_cli.py +1 -2
- janus/cli.py +43 -50
- janus/converter/__init__.py +6 -0
- janus/converter/_tests/__init__.py +0 -0
- janus/{_tests → converter/_tests}/test_translate.py +11 -22
- janus/converter/converter.py +614 -0
- janus/converter/diagram.py +124 -0
- janus/converter/document.py +131 -0
- janus/converter/evaluate.py +15 -0
- janus/converter/requirements.py +51 -0
- janus/converter/translate.py +108 -0
- janus/language/block.py +1 -1
- janus/language/combine.py +0 -1
- janus/language/treesitter/treesitter.py +20 -1
- janus/llm/model_callbacks.py +33 -36
- janus/llm/models_info.py +14 -0
- janus/metrics/reading.py +27 -5
- janus/prompts/prompt.py +37 -11
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.1.dist-info}/METADATA +1 -1
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.1.dist-info}/RECORD +25 -19
- janus/converter.py +0 -161
- janus/translate.py +0 -987
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.1.dist-info}/LICENSE +0 -0
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.1.dist-info}/WHEEL +0 -0
- {janus_llm-2.1.0.dist-info → janus_llm-3.0.1.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -2,10 +2,10 @@ import warnings
|
|
2
2
|
|
3
3
|
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
4
|
|
5
|
+
from .converter.translate import Translator
|
5
6
|
from .metrics import * # noqa: F403
|
6
|
-
from .translate import Translator
|
7
7
|
|
8
|
-
__version__ = "
|
8
|
+
__version__ = "3.0.1"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
CHANGED
janus/_tests/test_cli.py
CHANGED
@@ -104,7 +104,7 @@ class TestCli(unittest.TestCase):
|
|
104
104
|
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
105
|
self.assertEqual(result.exit_code, 0)
|
106
106
|
|
107
|
-
@patch("janus.translate.Translator.translate", autospec=True)
|
107
|
+
@patch("janus.converter.translate.Translator.translate", autospec=True)
|
108
108
|
def test_translate(self, mock_translate):
|
109
109
|
# Arrange
|
110
110
|
mock_instance = mock_translate.return_value
|
@@ -119,7 +119,6 @@ class TestCli(unittest.TestCase):
|
|
119
119
|
overwrite=True,
|
120
120
|
temp=0.7,
|
121
121
|
prompt_template="simple",
|
122
|
-
parser_type="code",
|
123
122
|
collection=None,
|
124
123
|
)
|
125
124
|
|
janus/cli.py
CHANGED
@@ -12,33 +12,32 @@ from rich.console import Console
|
|
12
12
|
from rich.prompt import Confirm
|
13
13
|
from typing_extensions import Annotated
|
14
14
|
|
15
|
-
from .
|
16
|
-
from .
|
17
|
-
from .
|
15
|
+
from janus.converter.diagram import DiagramGenerator
|
16
|
+
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
17
|
+
from janus.converter.requirements import RequirementsDocumenter
|
18
|
+
from janus.converter.translate import Translator
|
19
|
+
from janus.embedding.collections import Collections
|
20
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
21
|
+
from janus.embedding.embedding_models_info import (
|
18
22
|
EMBEDDING_COST_PER_MODEL,
|
19
23
|
EMBEDDING_MODEL_CONFIG_DIR,
|
20
24
|
EMBEDDING_TOKEN_LIMITS,
|
21
25
|
EmbeddingModelType,
|
22
26
|
)
|
23
|
-
from .embedding.vectorize import ChromaDBVectorizer
|
24
|
-
from .language.binary import BinarySplitter
|
25
|
-
from .language.mumps import MumpsSplitter
|
26
|
-
from .language.naive.registry import CUSTOM_SPLITTERS
|
27
|
-
from .language.treesitter import TreeSitterSplitter
|
28
|
-
from .llm.model_callbacks import COST_PER_1K_TOKENS
|
29
|
-
from .llm.models_info import
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
DiagramGenerator,
|
34
|
-
Documenter,
|
35
|
-
MadLibsDocumenter,
|
36
|
-
MultiDocumenter,
|
37
|
-
RequirementsDocumenter,
|
38
|
-
Translator,
|
27
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
28
|
+
from janus.language.binary import BinarySplitter
|
29
|
+
from janus.language.mumps import MumpsSplitter
|
30
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
31
|
+
from janus.language.treesitter import TreeSitterSplitter
|
32
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
33
|
+
from janus.llm.models_info import (
|
34
|
+
MODEL_CONFIG_DIR,
|
35
|
+
MODEL_TYPE_CONSTRUCTORS,
|
36
|
+
TOKEN_LIMITS,
|
39
37
|
)
|
40
|
-
from .
|
41
|
-
from .utils.
|
38
|
+
from janus.metrics.cli import evaluate
|
39
|
+
from janus.utils.enums import LANGUAGES
|
40
|
+
from janus.utils.logger import create_logger
|
42
41
|
|
43
42
|
httpx_logger = logging.getLogger("httpx")
|
44
43
|
httpx_logger.setLevel(logging.WARNING)
|
@@ -71,10 +70,14 @@ def get_collections_config():
|
|
71
70
|
|
72
71
|
|
73
72
|
app = typer.Typer(
|
74
|
-
help=
|
73
|
+
help=(
|
74
|
+
"[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
|
75
|
+
"documenting, and diagramming code using large language models.[/bold]"
|
76
|
+
),
|
75
77
|
add_completion=False,
|
76
78
|
no_args_is_help=True,
|
77
79
|
context_settings={"help_option_names": ["-h", "--help"]},
|
80
|
+
rich_markup_mode="rich",
|
78
81
|
)
|
79
82
|
|
80
83
|
|
@@ -206,15 +209,6 @@ def translate(
|
|
206
209
|
"path to a directory containing those template files.",
|
207
210
|
),
|
208
211
|
] = "simple",
|
209
|
-
parser_type: Annotated[
|
210
|
-
str,
|
211
|
-
typer.Option(
|
212
|
-
"--parser",
|
213
|
-
"-P",
|
214
|
-
click_type=click.Choice(sorted(PARSER_TYPES)),
|
215
|
-
help="The type of parser to use.",
|
216
|
-
),
|
217
|
-
] = "code",
|
218
212
|
collection: Annotated[
|
219
213
|
str,
|
220
214
|
typer.Option(
|
@@ -224,15 +218,15 @@ def translate(
|
|
224
218
|
"collection with the name provided.",
|
225
219
|
),
|
226
220
|
] = None,
|
227
|
-
|
228
|
-
|
221
|
+
splitter_type: Annotated[
|
222
|
+
str,
|
229
223
|
typer.Option(
|
230
|
-
"-
|
231
|
-
"--
|
224
|
+
"-S",
|
225
|
+
"--splitter",
|
232
226
|
help="Name of custom splitter to use",
|
233
227
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
234
228
|
),
|
235
|
-
] =
|
229
|
+
] = "file",
|
236
230
|
max_tokens: Annotated[
|
237
231
|
int,
|
238
232
|
typer.Option(
|
@@ -264,10 +258,9 @@ def translate(
|
|
264
258
|
max_prompts=max_prompts,
|
265
259
|
max_tokens=max_tokens,
|
266
260
|
prompt_template=prompt_template,
|
267
|
-
parser_type=parser_type,
|
268
261
|
db_path=db_loc,
|
269
262
|
db_config=collections_config,
|
270
|
-
|
263
|
+
splitter_type=splitter_type,
|
271
264
|
)
|
272
265
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
273
266
|
|
@@ -363,15 +356,15 @@ def document(
|
|
363
356
|
"collection with the name provided.",
|
364
357
|
),
|
365
358
|
] = None,
|
366
|
-
|
367
|
-
|
359
|
+
splitter_type: Annotated[
|
360
|
+
str,
|
368
361
|
typer.Option(
|
369
|
-
"-
|
370
|
-
"--
|
362
|
+
"-S",
|
363
|
+
"--splitter",
|
371
364
|
help="Name of custom splitter to use",
|
372
365
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
373
366
|
),
|
374
|
-
] =
|
367
|
+
] = "file",
|
375
368
|
max_tokens: Annotated[
|
376
369
|
int,
|
377
370
|
typer.Option(
|
@@ -392,7 +385,7 @@ def document(
|
|
392
385
|
max_tokens=max_tokens,
|
393
386
|
db_path=db_loc,
|
394
387
|
db_config=collections_config,
|
395
|
-
|
388
|
+
splitter_type=splitter_type,
|
396
389
|
)
|
397
390
|
if doc_mode == "madlibs":
|
398
391
|
documenter = MadLibsDocumenter(
|
@@ -488,15 +481,15 @@ def diagram(
|
|
488
481
|
help="Whether to use documentation in generation",
|
489
482
|
),
|
490
483
|
] = False,
|
491
|
-
|
492
|
-
|
484
|
+
splitter_type: Annotated[
|
485
|
+
str,
|
493
486
|
typer.Option(
|
494
|
-
"-
|
495
|
-
"--
|
487
|
+
"-S",
|
488
|
+
"--splitter",
|
496
489
|
help="Name of custom splitter to use",
|
497
490
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
498
491
|
),
|
499
|
-
] =
|
492
|
+
] = "file",
|
500
493
|
):
|
501
494
|
model_arguments = dict(temperature=temperature)
|
502
495
|
collections_config = get_collections_config()
|
@@ -509,7 +502,7 @@ def diagram(
|
|
509
502
|
db_config=collections_config,
|
510
503
|
diagram_type=diagram_type,
|
511
504
|
add_documentation=add_documentation,
|
512
|
-
|
505
|
+
splitter_type=splitter_type,
|
513
506
|
)
|
514
507
|
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
515
508
|
|
File without changes
|
@@ -7,8 +7,11 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from
|
11
|
-
|
10
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
|
12
|
+
from ..diagram import DiagramGenerator
|
13
|
+
from ..requirements import RequirementsDocumenter
|
14
|
+
from ..translate import Translator
|
12
15
|
|
13
16
|
# from langchain.vectorstores import Chroma
|
14
17
|
|
@@ -84,14 +87,10 @@ class TestTranslator(unittest.TestCase):
|
|
84
87
|
self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
|
85
88
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
86
89
|
|
87
|
-
self.req_translator =
|
90
|
+
self.req_translator = RequirementsDocumenter(
|
88
91
|
model="gpt-3.5-turbo-0125",
|
89
|
-
# embeddings_override=MockEmbeddingsFactory(),
|
90
92
|
source_language="fortran",
|
91
|
-
target_language="text",
|
92
|
-
target_version="3.10",
|
93
93
|
prompt_template="requirements",
|
94
|
-
parser_type="text",
|
95
94
|
)
|
96
95
|
|
97
96
|
@pytest.mark.translate
|
@@ -273,11 +272,6 @@ class TestTranslator(unittest.TestCase):
|
|
273
272
|
# print_query_results(QUERY_STRING, n_results)
|
274
273
|
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
275
274
|
|
276
|
-
def test_output_as_requirements(self):
|
277
|
-
"""Is output type requirements?"""
|
278
|
-
self.assertFalse(self.translator.outputting_requirements())
|
279
|
-
self.assertTrue(self.req_translator.outputting_requirements())
|
280
|
-
|
281
275
|
# @pytest.mark.slow
|
282
276
|
# def test_document_embeddings_added_by_translate(self):
|
283
277
|
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
@@ -310,7 +304,6 @@ class TestTranslator(unittest.TestCase):
|
|
310
304
|
self.assertRaises(
|
311
305
|
ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
|
312
306
|
)
|
313
|
-
self.assertRaises(ValueError, self.translator.set_parser_type, "blah")
|
314
307
|
self.assertRaises(
|
315
308
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
316
309
|
)
|
@@ -360,13 +353,12 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
360
353
|
|
361
354
|
|
362
355
|
@pytest.mark.parametrize(
|
363
|
-
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
364
|
-
"parser_type",
|
356
|
+
"source_language,prompt_template,expected_target_language,expected_target_version,",
|
365
357
|
[
|
366
|
-
("python", "document_inline", "python", "3.10"
|
367
|
-
("fortran", "document", "text", None
|
368
|
-
("mumps", "requirements", "text", None
|
369
|
-
("python", "simple", "javascript", "es6"
|
358
|
+
("python", "document_inline", "python", "3.10"),
|
359
|
+
("fortran", "document", "text", None),
|
360
|
+
("mumps", "requirements", "text", None),
|
361
|
+
("python", "simple", "javascript", "es6"),
|
370
362
|
],
|
371
363
|
)
|
372
364
|
def test_language_combinations(
|
@@ -374,7 +366,6 @@ def test_language_combinations(
|
|
374
366
|
prompt_template: str,
|
375
367
|
expected_target_language: str,
|
376
368
|
expected_target_version: str,
|
377
|
-
parser_type: str,
|
378
369
|
):
|
379
370
|
"""Tests that translator target language settings are consistent
|
380
371
|
with prompt template expectations.
|
@@ -383,12 +374,10 @@ def test_language_combinations(
|
|
383
374
|
translator.set_model("gpt-3.5-turbo-0125")
|
384
375
|
translator.set_source_language(source_language)
|
385
376
|
translator.set_target_language(expected_target_language, expected_target_version)
|
386
|
-
translator.set_parser_type(parser_type)
|
387
377
|
translator.set_prompt(prompt_template)
|
388
378
|
translator._load_parameters()
|
389
379
|
assert translator._target_language == expected_target_language # nosec
|
390
380
|
assert translator._target_version == expected_target_version # nosec
|
391
|
-
assert translator._parser_type == parser_type # nosec
|
392
381
|
assert translator._splitter.language == source_language # nosec
|
393
382
|
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
383
|
assert translator._prompt_template_name == prompt_template # nosec
|