janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +2 -2
- janus/__main__.py +1 -1
- janus/_tests/test_cli.py +1 -2
- janus/cli.py +43 -51
- janus/converter/__init__.py +6 -0
- janus/converter/_tests/__init__.py +0 -0
- janus/{_tests → converter/_tests}/test_translate.py +11 -22
- janus/converter/converter.py +614 -0
- janus/converter/diagram.py +124 -0
- janus/converter/document.py +131 -0
- janus/converter/evaluate.py +15 -0
- janus/converter/requirements.py +50 -0
- janus/converter/translate.py +108 -0
- janus/embedding/_tests/test_collections.py +2 -2
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/__init__.py +1 -0
- janus/language/alc/_tests/__init__.py +0 -0
- janus/language/alc/_tests/test_alc.py +28 -0
- janus/language/alc/alc.py +87 -0
- janus/language/block.py +4 -2
- janus/language/combine.py +0 -1
- janus/language/mumps/mumps.py +2 -3
- janus/language/naive/__init__.py +1 -1
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +23 -12
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/splitter.py +10 -4
- janus/language/treesitter/treesitter.py +26 -8
- janus/llm/model_callbacks.py +34 -37
- janus/llm/models_info.py +16 -3
- janus/metrics/_tests/test_llm.py +2 -3
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/complexity_metrics.py +3 -4
- janus/metrics/metric.py +3 -4
- janus/metrics/reading.py +27 -5
- janus/prompts/prompt.py +67 -7
- janus/utils/enums.py +6 -5
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
- janus/converter.py +0 -158
- janus/translate.py +0 -981
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -2,10 +2,10 @@ import warnings
|
|
2
2
|
|
3
3
|
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
4
|
|
5
|
+
from .converter.translate import Translator
|
5
6
|
from .metrics import * # noqa: F403
|
6
|
-
from .translate import Translator
|
7
7
|
|
8
|
-
__version__ = "
|
8
|
+
__version__ = "3.0.0"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
CHANGED
janus/_tests/test_cli.py
CHANGED
@@ -104,7 +104,7 @@ class TestCli(unittest.TestCase):
|
|
104
104
|
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
105
|
self.assertEqual(result.exit_code, 0)
|
106
106
|
|
107
|
-
@patch("janus.translate.Translator.translate", autospec=True)
|
107
|
+
@patch("janus.converter.translate.Translator.translate", autospec=True)
|
108
108
|
def test_translate(self, mock_translate):
|
109
109
|
# Arrange
|
110
110
|
mock_instance = mock_translate.return_value
|
@@ -119,7 +119,6 @@ class TestCli(unittest.TestCase):
|
|
119
119
|
overwrite=True,
|
120
120
|
temp=0.7,
|
121
121
|
prompt_template="simple",
|
122
|
-
parser_type="code",
|
123
122
|
collection=None,
|
124
123
|
)
|
125
124
|
|
janus/cli.py
CHANGED
@@ -12,34 +12,32 @@ from rich.console import Console
|
|
12
12
|
from rich.prompt import Confirm
|
13
13
|
from typing_extensions import Annotated
|
14
14
|
|
15
|
-
from janus.
|
16
|
-
|
17
|
-
from .
|
18
|
-
from .
|
19
|
-
from .embedding.
|
15
|
+
from janus.converter.diagram import DiagramGenerator
|
16
|
+
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
17
|
+
from janus.converter.requirements import RequirementsDocumenter
|
18
|
+
from janus.converter.translate import Translator
|
19
|
+
from janus.embedding.collections import Collections
|
20
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
21
|
+
from janus.embedding.embedding_models_info import (
|
20
22
|
EMBEDDING_COST_PER_MODEL,
|
21
23
|
EMBEDDING_MODEL_CONFIG_DIR,
|
22
24
|
EMBEDDING_TOKEN_LIMITS,
|
23
25
|
EmbeddingModelType,
|
24
26
|
)
|
25
|
-
from .embedding.vectorize import ChromaDBVectorizer
|
26
|
-
from .language.binary import BinarySplitter
|
27
|
-
from .language.mumps import MumpsSplitter
|
28
|
-
from .language.
|
29
|
-
from .
|
30
|
-
from .llm.
|
31
|
-
from .
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
Documenter,
|
36
|
-
MadLibsDocumenter,
|
37
|
-
MultiDocumenter,
|
38
|
-
RequirementsDocumenter,
|
39
|
-
Translator,
|
27
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
28
|
+
from janus.language.binary import BinarySplitter
|
29
|
+
from janus.language.mumps import MumpsSplitter
|
30
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
31
|
+
from janus.language.treesitter import TreeSitterSplitter
|
32
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
33
|
+
from janus.llm.models_info import (
|
34
|
+
MODEL_CONFIG_DIR,
|
35
|
+
MODEL_TYPE_CONSTRUCTORS,
|
36
|
+
TOKEN_LIMITS,
|
40
37
|
)
|
41
|
-
from .
|
42
|
-
from .utils.
|
38
|
+
from janus.metrics.cli import evaluate
|
39
|
+
from janus.utils.enums import LANGUAGES
|
40
|
+
from janus.utils.logger import create_logger
|
43
41
|
|
44
42
|
httpx_logger = logging.getLogger("httpx")
|
45
43
|
httpx_logger.setLevel(logging.WARNING)
|
@@ -72,10 +70,14 @@ def get_collections_config():
|
|
72
70
|
|
73
71
|
|
74
72
|
app = typer.Typer(
|
75
|
-
help=
|
73
|
+
help=(
|
74
|
+
"[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
|
75
|
+
"documenting, and diagramming code using large language models.[/bold]"
|
76
|
+
),
|
76
77
|
add_completion=False,
|
77
78
|
no_args_is_help=True,
|
78
79
|
context_settings={"help_option_names": ["-h", "--help"]},
|
80
|
+
rich_markup_mode="rich",
|
79
81
|
)
|
80
82
|
|
81
83
|
|
@@ -207,15 +209,6 @@ def translate(
|
|
207
209
|
"path to a directory containing those template files.",
|
208
210
|
),
|
209
211
|
] = "simple",
|
210
|
-
parser_type: Annotated[
|
211
|
-
str,
|
212
|
-
typer.Option(
|
213
|
-
"--parser",
|
214
|
-
"-P",
|
215
|
-
click_type=click.Choice(sorted(PARSER_TYPES)),
|
216
|
-
help="The type of parser to use.",
|
217
|
-
),
|
218
|
-
] = "code",
|
219
212
|
collection: Annotated[
|
220
213
|
str,
|
221
214
|
typer.Option(
|
@@ -225,15 +218,15 @@ def translate(
|
|
225
218
|
"collection with the name provided.",
|
226
219
|
),
|
227
220
|
] = None,
|
228
|
-
|
229
|
-
|
221
|
+
splitter_type: Annotated[
|
222
|
+
str,
|
230
223
|
typer.Option(
|
231
|
-
"-
|
232
|
-
"--
|
224
|
+
"-S",
|
225
|
+
"--splitter",
|
233
226
|
help="Name of custom splitter to use",
|
234
227
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
235
228
|
),
|
236
|
-
] =
|
229
|
+
] = "file",
|
237
230
|
max_tokens: Annotated[
|
238
231
|
int,
|
239
232
|
typer.Option(
|
@@ -265,10 +258,9 @@ def translate(
|
|
265
258
|
max_prompts=max_prompts,
|
266
259
|
max_tokens=max_tokens,
|
267
260
|
prompt_template=prompt_template,
|
268
|
-
parser_type=parser_type,
|
269
261
|
db_path=db_loc,
|
270
262
|
db_config=collections_config,
|
271
|
-
|
263
|
+
splitter_type=splitter_type,
|
272
264
|
)
|
273
265
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
274
266
|
|
@@ -364,15 +356,15 @@ def document(
|
|
364
356
|
"collection with the name provided.",
|
365
357
|
),
|
366
358
|
] = None,
|
367
|
-
|
368
|
-
|
359
|
+
splitter_type: Annotated[
|
360
|
+
str,
|
369
361
|
typer.Option(
|
370
|
-
"-
|
371
|
-
"--
|
362
|
+
"-S",
|
363
|
+
"--splitter",
|
372
364
|
help="Name of custom splitter to use",
|
373
365
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
374
366
|
),
|
375
|
-
] =
|
367
|
+
] = "file",
|
376
368
|
max_tokens: Annotated[
|
377
369
|
int,
|
378
370
|
typer.Option(
|
@@ -393,7 +385,7 @@ def document(
|
|
393
385
|
max_tokens=max_tokens,
|
394
386
|
db_path=db_loc,
|
395
387
|
db_config=collections_config,
|
396
|
-
|
388
|
+
splitter_type=splitter_type,
|
397
389
|
)
|
398
390
|
if doc_mode == "madlibs":
|
399
391
|
documenter = MadLibsDocumenter(
|
@@ -489,15 +481,15 @@ def diagram(
|
|
489
481
|
help="Whether to use documentation in generation",
|
490
482
|
),
|
491
483
|
] = False,
|
492
|
-
|
493
|
-
|
484
|
+
splitter_type: Annotated[
|
485
|
+
str,
|
494
486
|
typer.Option(
|
495
|
-
"-
|
496
|
-
"--
|
487
|
+
"-S",
|
488
|
+
"--splitter",
|
497
489
|
help="Name of custom splitter to use",
|
498
490
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
499
491
|
),
|
500
|
-
] =
|
492
|
+
] = "file",
|
501
493
|
):
|
502
494
|
model_arguments = dict(temperature=temperature)
|
503
495
|
collections_config = get_collections_config()
|
@@ -510,7 +502,7 @@ def diagram(
|
|
510
502
|
db_config=collections_config,
|
511
503
|
diagram_type=diagram_type,
|
512
504
|
add_documentation=add_documentation,
|
513
|
-
|
505
|
+
splitter_type=splitter_type,
|
514
506
|
)
|
515
507
|
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
516
508
|
|
File without changes
|
@@ -7,8 +7,11 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from
|
11
|
-
|
10
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
|
12
|
+
from ..diagram import DiagramGenerator
|
13
|
+
from ..requirements import RequirementsDocumenter
|
14
|
+
from ..translate import Translator
|
12
15
|
|
13
16
|
# from langchain.vectorstores import Chroma
|
14
17
|
|
@@ -84,14 +87,10 @@ class TestTranslator(unittest.TestCase):
|
|
84
87
|
self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
|
85
88
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
86
89
|
|
87
|
-
self.req_translator =
|
90
|
+
self.req_translator = RequirementsDocumenter(
|
88
91
|
model="gpt-3.5-turbo-0125",
|
89
|
-
# embeddings_override=MockEmbeddingsFactory(),
|
90
92
|
source_language="fortran",
|
91
|
-
target_language="text",
|
92
|
-
target_version="3.10",
|
93
93
|
prompt_template="requirements",
|
94
|
-
parser_type="text",
|
95
94
|
)
|
96
95
|
|
97
96
|
@pytest.mark.translate
|
@@ -273,11 +272,6 @@ class TestTranslator(unittest.TestCase):
|
|
273
272
|
# print_query_results(QUERY_STRING, n_results)
|
274
273
|
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
275
274
|
|
276
|
-
def test_output_as_requirements(self):
|
277
|
-
"""Is output type requirements?"""
|
278
|
-
self.assertFalse(self.translator.outputting_requirements())
|
279
|
-
self.assertTrue(self.req_translator.outputting_requirements())
|
280
|
-
|
281
275
|
# @pytest.mark.slow
|
282
276
|
# def test_document_embeddings_added_by_translate(self):
|
283
277
|
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
@@ -310,7 +304,6 @@ class TestTranslator(unittest.TestCase):
|
|
310
304
|
self.assertRaises(
|
311
305
|
ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
|
312
306
|
)
|
313
|
-
self.assertRaises(ValueError, self.translator.set_parser_type, "blah")
|
314
307
|
self.assertRaises(
|
315
308
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
316
309
|
)
|
@@ -360,13 +353,12 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
360
353
|
|
361
354
|
|
362
355
|
@pytest.mark.parametrize(
|
363
|
-
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
364
|
-
"parser_type",
|
356
|
+
"source_language,prompt_template,expected_target_language,expected_target_version,",
|
365
357
|
[
|
366
|
-
("python", "document_inline", "python", "3.10"
|
367
|
-
("fortran", "document", "text", None
|
368
|
-
("mumps", "requirements", "text", None
|
369
|
-
("python", "simple", "javascript", "es6"
|
358
|
+
("python", "document_inline", "python", "3.10"),
|
359
|
+
("fortran", "document", "text", None),
|
360
|
+
("mumps", "requirements", "text", None),
|
361
|
+
("python", "simple", "javascript", "es6"),
|
370
362
|
],
|
371
363
|
)
|
372
364
|
def test_language_combinations(
|
@@ -374,7 +366,6 @@ def test_language_combinations(
|
|
374
366
|
prompt_template: str,
|
375
367
|
expected_target_language: str,
|
376
368
|
expected_target_version: str,
|
377
|
-
parser_type: str,
|
378
369
|
):
|
379
370
|
"""Tests that translator target language settings are consistent
|
380
371
|
with prompt template expectations.
|
@@ -383,12 +374,10 @@ def test_language_combinations(
|
|
383
374
|
translator.set_model("gpt-3.5-turbo-0125")
|
384
375
|
translator.set_source_language(source_language)
|
385
376
|
translator.set_target_language(expected_target_language, expected_target_version)
|
386
|
-
translator.set_parser_type(parser_type)
|
387
377
|
translator.set_prompt(prompt_template)
|
388
378
|
translator._load_parameters()
|
389
379
|
assert translator._target_language == expected_target_language # nosec
|
390
380
|
assert translator._target_version == expected_target_version # nosec
|
391
|
-
assert translator._parser_type == parser_type # nosec
|
392
381
|
assert translator._splitter.language == source_language # nosec
|
393
382
|
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
383
|
assert translator._prompt_template_name == prompt_template # nosec
|