janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +2 -2
- janus/__main__.py +1 -1
- janus/_tests/test_cli.py +1 -2
- janus/cli.py +43 -51
- janus/converter/__init__.py +6 -0
- janus/converter/_tests/__init__.py +0 -0
- janus/{_tests → converter/_tests}/test_translate.py +11 -22
- janus/converter/converter.py +614 -0
- janus/converter/diagram.py +124 -0
- janus/converter/document.py +131 -0
- janus/converter/evaluate.py +15 -0
- janus/converter/requirements.py +50 -0
- janus/converter/translate.py +108 -0
- janus/embedding/_tests/test_collections.py +2 -2
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/__init__.py +1 -0
- janus/language/alc/_tests/__init__.py +0 -0
- janus/language/alc/_tests/test_alc.py +28 -0
- janus/language/alc/alc.py +87 -0
- janus/language/block.py +4 -2
- janus/language/combine.py +0 -1
- janus/language/mumps/mumps.py +2 -3
- janus/language/naive/__init__.py +1 -1
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +23 -12
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/splitter.py +10 -4
- janus/language/treesitter/treesitter.py +26 -8
- janus/llm/model_callbacks.py +34 -37
- janus/llm/models_info.py +16 -3
- janus/metrics/_tests/test_llm.py +2 -3
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/complexity_metrics.py +3 -4
- janus/metrics/metric.py +3 -4
- janus/metrics/reading.py +27 -5
- janus/prompts/prompt.py +67 -7
- janus/utils/enums.py +6 -5
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
- janus/converter.py +0 -158
- janus/translate.py +0 -981
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -2,10 +2,10 @@ import warnings
|
|
2
2
|
|
3
3
|
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
4
|
|
5
|
+
from .converter.translate import Translator
|
5
6
|
from .metrics import * # noqa: F403
|
6
|
-
from .translate import Translator
|
7
7
|
|
8
|
-
__version__ = "
|
8
|
+
__version__ = "3.0.0"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/__main__.py
CHANGED
janus/_tests/test_cli.py
CHANGED
@@ -104,7 +104,7 @@ class TestCli(unittest.TestCase):
|
|
104
104
|
result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
|
105
105
|
self.assertEqual(result.exit_code, 0)
|
106
106
|
|
107
|
-
@patch("janus.translate.Translator.translate", autospec=True)
|
107
|
+
@patch("janus.converter.translate.Translator.translate", autospec=True)
|
108
108
|
def test_translate(self, mock_translate):
|
109
109
|
# Arrange
|
110
110
|
mock_instance = mock_translate.return_value
|
@@ -119,7 +119,6 @@ class TestCli(unittest.TestCase):
|
|
119
119
|
overwrite=True,
|
120
120
|
temp=0.7,
|
121
121
|
prompt_template="simple",
|
122
|
-
parser_type="code",
|
123
122
|
collection=None,
|
124
123
|
)
|
125
124
|
|
janus/cli.py
CHANGED
@@ -12,34 +12,32 @@ from rich.console import Console
|
|
12
12
|
from rich.prompt import Confirm
|
13
13
|
from typing_extensions import Annotated
|
14
14
|
|
15
|
-
from janus.
|
16
|
-
|
17
|
-
from .
|
18
|
-
from .
|
19
|
-
from .embedding.
|
15
|
+
from janus.converter.diagram import DiagramGenerator
|
16
|
+
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
17
|
+
from janus.converter.requirements import RequirementsDocumenter
|
18
|
+
from janus.converter.translate import Translator
|
19
|
+
from janus.embedding.collections import Collections
|
20
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
21
|
+
from janus.embedding.embedding_models_info import (
|
20
22
|
EMBEDDING_COST_PER_MODEL,
|
21
23
|
EMBEDDING_MODEL_CONFIG_DIR,
|
22
24
|
EMBEDDING_TOKEN_LIMITS,
|
23
25
|
EmbeddingModelType,
|
24
26
|
)
|
25
|
-
from .embedding.vectorize import ChromaDBVectorizer
|
26
|
-
from .language.binary import BinarySplitter
|
27
|
-
from .language.mumps import MumpsSplitter
|
28
|
-
from .language.
|
29
|
-
from .
|
30
|
-
from .llm.
|
31
|
-
from .
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
Documenter,
|
36
|
-
MadLibsDocumenter,
|
37
|
-
MultiDocumenter,
|
38
|
-
RequirementsDocumenter,
|
39
|
-
Translator,
|
27
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
28
|
+
from janus.language.binary import BinarySplitter
|
29
|
+
from janus.language.mumps import MumpsSplitter
|
30
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
31
|
+
from janus.language.treesitter import TreeSitterSplitter
|
32
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
33
|
+
from janus.llm.models_info import (
|
34
|
+
MODEL_CONFIG_DIR,
|
35
|
+
MODEL_TYPE_CONSTRUCTORS,
|
36
|
+
TOKEN_LIMITS,
|
40
37
|
)
|
41
|
-
from .
|
42
|
-
from .utils.
|
38
|
+
from janus.metrics.cli import evaluate
|
39
|
+
from janus.utils.enums import LANGUAGES
|
40
|
+
from janus.utils.logger import create_logger
|
43
41
|
|
44
42
|
httpx_logger = logging.getLogger("httpx")
|
45
43
|
httpx_logger.setLevel(logging.WARNING)
|
@@ -72,10 +70,14 @@ def get_collections_config():
|
|
72
70
|
|
73
71
|
|
74
72
|
app = typer.Typer(
|
75
|
-
help=
|
73
|
+
help=(
|
74
|
+
"[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
|
75
|
+
"documenting, and diagramming code using large language models.[/bold]"
|
76
|
+
),
|
76
77
|
add_completion=False,
|
77
78
|
no_args_is_help=True,
|
78
79
|
context_settings={"help_option_names": ["-h", "--help"]},
|
80
|
+
rich_markup_mode="rich",
|
79
81
|
)
|
80
82
|
|
81
83
|
|
@@ -207,15 +209,6 @@ def translate(
|
|
207
209
|
"path to a directory containing those template files.",
|
208
210
|
),
|
209
211
|
] = "simple",
|
210
|
-
parser_type: Annotated[
|
211
|
-
str,
|
212
|
-
typer.Option(
|
213
|
-
"--parser",
|
214
|
-
"-P",
|
215
|
-
click_type=click.Choice(sorted(PARSER_TYPES)),
|
216
|
-
help="The type of parser to use.",
|
217
|
-
),
|
218
|
-
] = "code",
|
219
212
|
collection: Annotated[
|
220
213
|
str,
|
221
214
|
typer.Option(
|
@@ -225,15 +218,15 @@ def translate(
|
|
225
218
|
"collection with the name provided.",
|
226
219
|
),
|
227
220
|
] = None,
|
228
|
-
|
229
|
-
|
221
|
+
splitter_type: Annotated[
|
222
|
+
str,
|
230
223
|
typer.Option(
|
231
|
-
"-
|
232
|
-
"--
|
224
|
+
"-S",
|
225
|
+
"--splitter",
|
233
226
|
help="Name of custom splitter to use",
|
234
227
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
235
228
|
),
|
236
|
-
] =
|
229
|
+
] = "file",
|
237
230
|
max_tokens: Annotated[
|
238
231
|
int,
|
239
232
|
typer.Option(
|
@@ -265,10 +258,9 @@ def translate(
|
|
265
258
|
max_prompts=max_prompts,
|
266
259
|
max_tokens=max_tokens,
|
267
260
|
prompt_template=prompt_template,
|
268
|
-
parser_type=parser_type,
|
269
261
|
db_path=db_loc,
|
270
262
|
db_config=collections_config,
|
271
|
-
|
263
|
+
splitter_type=splitter_type,
|
272
264
|
)
|
273
265
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
274
266
|
|
@@ -364,15 +356,15 @@ def document(
|
|
364
356
|
"collection with the name provided.",
|
365
357
|
),
|
366
358
|
] = None,
|
367
|
-
|
368
|
-
|
359
|
+
splitter_type: Annotated[
|
360
|
+
str,
|
369
361
|
typer.Option(
|
370
|
-
"-
|
371
|
-
"--
|
362
|
+
"-S",
|
363
|
+
"--splitter",
|
372
364
|
help="Name of custom splitter to use",
|
373
365
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
374
366
|
),
|
375
|
-
] =
|
367
|
+
] = "file",
|
376
368
|
max_tokens: Annotated[
|
377
369
|
int,
|
378
370
|
typer.Option(
|
@@ -393,7 +385,7 @@ def document(
|
|
393
385
|
max_tokens=max_tokens,
|
394
386
|
db_path=db_loc,
|
395
387
|
db_config=collections_config,
|
396
|
-
|
388
|
+
splitter_type=splitter_type,
|
397
389
|
)
|
398
390
|
if doc_mode == "madlibs":
|
399
391
|
documenter = MadLibsDocumenter(
|
@@ -489,15 +481,15 @@ def diagram(
|
|
489
481
|
help="Whether to use documentation in generation",
|
490
482
|
),
|
491
483
|
] = False,
|
492
|
-
|
493
|
-
|
484
|
+
splitter_type: Annotated[
|
485
|
+
str,
|
494
486
|
typer.Option(
|
495
|
-
"-
|
496
|
-
"--
|
487
|
+
"-S",
|
488
|
+
"--splitter",
|
497
489
|
help="Name of custom splitter to use",
|
498
490
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
499
491
|
),
|
500
|
-
] =
|
492
|
+
] = "file",
|
501
493
|
):
|
502
494
|
model_arguments = dict(temperature=temperature)
|
503
495
|
collections_config = get_collections_config()
|
@@ -510,7 +502,7 @@ def diagram(
|
|
510
502
|
db_config=collections_config,
|
511
503
|
diagram_type=diagram_type,
|
512
504
|
add_documentation=add_documentation,
|
513
|
-
|
505
|
+
splitter_type=splitter_type,
|
514
506
|
)
|
515
507
|
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
516
508
|
|
File without changes
|
@@ -7,8 +7,11 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
-
from
|
11
|
-
|
10
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
11
|
+
|
12
|
+
from ..diagram import DiagramGenerator
|
13
|
+
from ..requirements import RequirementsDocumenter
|
14
|
+
from ..translate import Translator
|
12
15
|
|
13
16
|
# from langchain.vectorstores import Chroma
|
14
17
|
|
@@ -84,14 +87,10 @@ class TestTranslator(unittest.TestCase):
|
|
84
87
|
self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
|
85
88
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
86
89
|
|
87
|
-
self.req_translator =
|
90
|
+
self.req_translator = RequirementsDocumenter(
|
88
91
|
model="gpt-3.5-turbo-0125",
|
89
|
-
# embeddings_override=MockEmbeddingsFactory(),
|
90
92
|
source_language="fortran",
|
91
|
-
target_language="text",
|
92
|
-
target_version="3.10",
|
93
93
|
prompt_template="requirements",
|
94
|
-
parser_type="text",
|
95
94
|
)
|
96
95
|
|
97
96
|
@pytest.mark.translate
|
@@ -273,11 +272,6 @@ class TestTranslator(unittest.TestCase):
|
|
273
272
|
# print_query_results(QUERY_STRING, n_results)
|
274
273
|
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
275
274
|
|
276
|
-
def test_output_as_requirements(self):
|
277
|
-
"""Is output type requirements?"""
|
278
|
-
self.assertFalse(self.translator.outputting_requirements())
|
279
|
-
self.assertTrue(self.req_translator.outputting_requirements())
|
280
|
-
|
281
275
|
# @pytest.mark.slow
|
282
276
|
# def test_document_embeddings_added_by_translate(self):
|
283
277
|
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
@@ -310,7 +304,6 @@ class TestTranslator(unittest.TestCase):
|
|
310
304
|
self.assertRaises(
|
311
305
|
ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
|
312
306
|
)
|
313
|
-
self.assertRaises(ValueError, self.translator.set_parser_type, "blah")
|
314
307
|
self.assertRaises(
|
315
308
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
316
309
|
)
|
@@ -360,13 +353,12 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
360
353
|
|
361
354
|
|
362
355
|
@pytest.mark.parametrize(
|
363
|
-
"source_language,prompt_template,expected_target_language,expected_target_version,"
|
364
|
-
"parser_type",
|
356
|
+
"source_language,prompt_template,expected_target_language,expected_target_version,",
|
365
357
|
[
|
366
|
-
("python", "document_inline", "python", "3.10"
|
367
|
-
("fortran", "document", "text", None
|
368
|
-
("mumps", "requirements", "text", None
|
369
|
-
("python", "simple", "javascript", "es6"
|
358
|
+
("python", "document_inline", "python", "3.10"),
|
359
|
+
("fortran", "document", "text", None),
|
360
|
+
("mumps", "requirements", "text", None),
|
361
|
+
("python", "simple", "javascript", "es6"),
|
370
362
|
],
|
371
363
|
)
|
372
364
|
def test_language_combinations(
|
@@ -374,7 +366,6 @@ def test_language_combinations(
|
|
374
366
|
prompt_template: str,
|
375
367
|
expected_target_language: str,
|
376
368
|
expected_target_version: str,
|
377
|
-
parser_type: str,
|
378
369
|
):
|
379
370
|
"""Tests that translator target language settings are consistent
|
380
371
|
with prompt template expectations.
|
@@ -383,12 +374,10 @@ def test_language_combinations(
|
|
383
374
|
translator.set_model("gpt-3.5-turbo-0125")
|
384
375
|
translator.set_source_language(source_language)
|
385
376
|
translator.set_target_language(expected_target_language, expected_target_version)
|
386
|
-
translator.set_parser_type(parser_type)
|
387
377
|
translator.set_prompt(prompt_template)
|
388
378
|
translator._load_parameters()
|
389
379
|
assert translator._target_language == expected_target_language # nosec
|
390
380
|
assert translator._target_version == expected_target_version # nosec
|
391
|
-
assert translator._parser_type == parser_type # nosec
|
392
381
|
assert translator._splitter.language == source_language # nosec
|
393
382
|
assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
|
394
383
|
assert translator._prompt_template_name == prompt_template # nosec
|