janus-llm 4.1.0__tar.gz → 4.3.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {janus_llm-4.1.0 → janus_llm-4.3.1}/PKG-INFO +9 -1
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/__init__.py +1 -1
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/cli.py +286 -30
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/__init__.py +1 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/converter.py +46 -47
- janus_llm-4.3.1/janus/converter/evaluate.py +241 -0
- janus_llm-4.3.1/janus/converter/partition.py +27 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/_tests/test_alc.py +1 -1
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/alc.py +9 -4
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/combine.py +22 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/splitter.py +31 -23
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/treesitter.py +9 -1
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/models_info.py +20 -12
- janus_llm-4.3.1/janus/parsers/eval_parsers/incose_parser.py +134 -0
- janus_llm-4.3.1/janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
- janus_llm-4.3.1/janus/parsers/partition_parser.py +168 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/refiners/refiner.py +38 -12
- janus_llm-4.3.1/janus/refiners/uml.py +33 -0
- janus_llm-4.3.1/janus/retrievers/retriever.py +102 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/enums.py +14 -0
- janus_llm-4.3.1/janus/utils/pdf_docs_reader.py +134 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/pyproject.toml +9 -1
- janus_llm-4.1.0/janus/converter/evaluate.py +0 -15
- janus_llm-4.1.0/janus/retrievers/retriever.py +0 -42
- {janus_llm-4.1.0 → janus_llm-4.3.1}/LICENSE +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/README.md +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/__main__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/conftest.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/test_cli.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/_tests/test_translate.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/aggregator.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/diagram.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/document.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/requirements.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/translate.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_collections.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_database.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_vectorize.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/collections.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/database.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/embedding_models_info.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/vectorize.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/test_combine.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/test_splitter.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/_tests/test_binary.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/binary.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/reveng/decompile_script.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/block.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/file.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/test_mumps.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/mumps.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/patterns.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/basic_splitter.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/chunk_splitter.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/registry.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/simple_ast.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/tag_splitter.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/node.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/model_callbacks.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/reference.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/target.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_bleu.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_chrf.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_file_pairing.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_llm.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_reading.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_rouge_score.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_similarity_score.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/bleu.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/chrf.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/cli.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/complexity_metrics.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/file_pairing.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/llm_metrics.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/metric.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/reading.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/rouge_score.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/similarity.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/splitting.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/_tests/test_code_parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/code_parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/doc_parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/eval_parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/reqs_parser.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/uml.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/prompts/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/prompts/prompt.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/__init__.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/test_logger.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/test_progress.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/logger.py +0 -0
- {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: janus-llm
|
3
|
-
Version: 4.1
|
3
|
+
Version: 4.3.1
|
4
4
|
Summary: A transcoding library using LLMs.
|
5
5
|
Home-page: https://github.com/janus-llm/janus-llm
|
6
6
|
License: Apache 2.0
|
@@ -23,20 +23,28 @@ Requires-Dist: langchain-anthropic (>=0.1.15,<0.2.0)
|
|
23
23
|
Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
|
24
24
|
Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
|
25
25
|
Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
|
26
|
+
Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
|
26
27
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
27
28
|
Requires-Dist: numpy (>=1.24.3,<2.0.0)
|
28
29
|
Requires-Dist: openai (>=1.14.0,<2.0.0)
|
30
|
+
Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
|
29
31
|
Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
|
30
32
|
Requires-Dist: py-rouge (>=1.1,<2.0)
|
33
|
+
Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
|
31
34
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
32
35
|
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
33
36
|
Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
|
37
|
+
Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
|
34
38
|
Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
|
39
|
+
Requires-Dist: tesseract (>=0.1.3,<0.2.0)
|
35
40
|
Requires-Dist: text-generation (>=0.6.0,<0.7.0)
|
36
41
|
Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
|
37
42
|
Requires-Dist: transformers (>=4.31.0,<5.0.0)
|
38
43
|
Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
|
39
44
|
Requires-Dist: typer (>=0.9.0,<0.10.0)
|
45
|
+
Requires-Dist: unstructured (>=0.15.9,<0.16.0)
|
46
|
+
Requires-Dist: unstructured-inference (>=0.7.36,<0.8.0)
|
47
|
+
Requires-Dist: unstructured-pytesseract (>=0.3.13,<0.4.0)
|
40
48
|
Project-URL: Documentation, https://janus-llm.github.io/janus-llm
|
41
49
|
Project-URL: Repository, https://github.com/janus-llm/janus-llm
|
42
50
|
Description-Content-Type: text/markdown
|
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
|
|
5
5
|
from janus.converter.translate import Translator
|
6
6
|
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "4.1
|
8
|
+
__version__ = "4.3.1"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
@@ -13,10 +13,14 @@ from rich.console import Console
|
|
13
13
|
from rich.prompt import Confirm
|
14
14
|
from typing_extensions import Annotated
|
15
15
|
|
16
|
+
import janus.refiners.refiner
|
17
|
+
import janus.refiners.uml
|
16
18
|
from janus.converter.aggregator import Aggregator
|
17
19
|
from janus.converter.converter import Converter
|
18
20
|
from janus.converter.diagram import DiagramGenerator
|
19
21
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
22
|
+
from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
|
23
|
+
from janus.converter.partition import Partitioner
|
20
24
|
from janus.converter.requirements import RequirementsDocumenter
|
21
25
|
from janus.converter.translate import Translator
|
22
26
|
from janus.embedding.collections import Collections
|
@@ -44,7 +48,6 @@ from janus.llm.models_info import (
|
|
44
48
|
openai_models,
|
45
49
|
)
|
46
50
|
from janus.metrics.cli import evaluate
|
47
|
-
from janus.refiners.refiner import REFINERS
|
48
51
|
from janus.utils.enums import LANGUAGES
|
49
52
|
from janus.utils.logger import create_logger
|
50
53
|
|
@@ -69,6 +72,18 @@ with open(db_file, "r") as f:
|
|
69
72
|
collections_config_file = Path(db_loc) / "collections.json"
|
70
73
|
|
71
74
|
|
75
|
+
def get_subclasses(cls):
|
76
|
+
return set(cls.__subclasses__()).union(
|
77
|
+
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
78
|
+
)
|
79
|
+
|
80
|
+
|
81
|
+
REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
|
82
|
+
{janus.refiners.refiner.JanusRefiner}
|
83
|
+
)
|
84
|
+
REFINERS = {r.__name__: r for r in REFINER_TYPES}
|
85
|
+
|
86
|
+
|
72
87
|
def get_collections_config():
|
73
88
|
if collections_config_file.exists():
|
74
89
|
with open(collections_config_file, "r") as f:
|
@@ -113,7 +128,7 @@ embedding = typer.Typer(
|
|
113
128
|
|
114
129
|
def version_callback(value: bool) -> None:
|
115
130
|
if value:
|
116
|
-
from
|
131
|
+
from . import __version__ as version
|
117
132
|
|
118
133
|
print(f"Janus CLI [blue]v{version}[/blue]")
|
119
134
|
raise typer.Exit()
|
@@ -244,22 +259,23 @@ def translate(
|
|
244
259
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
245
260
|
),
|
246
261
|
] = "file",
|
247
|
-
|
248
|
-
str,
|
262
|
+
refiner_types: Annotated[
|
263
|
+
list[str],
|
249
264
|
typer.Option(
|
250
265
|
"-r",
|
251
266
|
"--refiner",
|
252
|
-
help="
|
267
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
268
|
+
refinement chain",
|
253
269
|
click_type=click.Choice(list(REFINERS.keys())),
|
254
270
|
),
|
255
|
-
] = "
|
271
|
+
] = ["JanusRefiner"],
|
256
272
|
retriever_type: Annotated[
|
257
273
|
str,
|
258
274
|
typer.Option(
|
259
275
|
"-R",
|
260
276
|
"--retriever",
|
261
277
|
help="Name of custom retriever to use",
|
262
|
-
click_type=click.Choice(["active_usings"]),
|
278
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
263
279
|
),
|
264
280
|
] = None,
|
265
281
|
max_tokens: Annotated[
|
@@ -272,6 +288,7 @@ def translate(
|
|
272
288
|
),
|
273
289
|
] = None,
|
274
290
|
):
|
291
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
275
292
|
try:
|
276
293
|
target_language, target_version = target_lang.split("-")
|
277
294
|
except ValueError:
|
@@ -296,7 +313,7 @@ def translate(
|
|
296
313
|
db_path=db_loc,
|
297
314
|
db_config=collections_config,
|
298
315
|
splitter_type=splitter_type,
|
299
|
-
|
316
|
+
refiner_types=refiner_types,
|
300
317
|
retriever_type=retriever_type,
|
301
318
|
)
|
302
319
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
@@ -402,22 +419,23 @@ def document(
|
|
402
419
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
403
420
|
),
|
404
421
|
] = "file",
|
405
|
-
|
406
|
-
str,
|
422
|
+
refiner_types: Annotated[
|
423
|
+
list[str],
|
407
424
|
typer.Option(
|
408
425
|
"-r",
|
409
426
|
"--refiner",
|
410
|
-
help="
|
427
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
428
|
+
refinement chain",
|
411
429
|
click_type=click.Choice(list(REFINERS.keys())),
|
412
430
|
),
|
413
|
-
] = "
|
431
|
+
] = ["JanusRefiner"],
|
414
432
|
retriever_type: Annotated[
|
415
433
|
str,
|
416
434
|
typer.Option(
|
417
435
|
"-R",
|
418
436
|
"--retriever",
|
419
437
|
help="Name of custom retriever to use",
|
420
|
-
click_type=click.Choice(["active_usings"]),
|
438
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
421
439
|
),
|
422
440
|
] = None,
|
423
441
|
max_tokens: Annotated[
|
@@ -430,6 +448,7 @@ def document(
|
|
430
448
|
),
|
431
449
|
] = None,
|
432
450
|
):
|
451
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
433
452
|
model_arguments = dict(temperature=temperature)
|
434
453
|
collections_config = get_collections_config()
|
435
454
|
kwargs = dict(
|
@@ -441,7 +460,7 @@ def document(
|
|
441
460
|
db_path=db_loc,
|
442
461
|
db_config=collections_config,
|
443
462
|
splitter_type=splitter_type,
|
444
|
-
|
463
|
+
refiner_types=refiner_types,
|
445
464
|
retriever_type=retriever_type,
|
446
465
|
)
|
447
466
|
if doc_mode == "madlibs":
|
@@ -458,12 +477,6 @@ def document(
|
|
458
477
|
documenter.translate(input_dir, output_dir, overwrite, collection)
|
459
478
|
|
460
479
|
|
461
|
-
def get_subclasses(cls):
|
462
|
-
return set(cls.__subclasses__()).union(
|
463
|
-
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
464
|
-
)
|
465
|
-
|
466
|
-
|
467
480
|
@app.command()
|
468
481
|
def aggregate(
|
469
482
|
input_dir: Annotated[
|
@@ -578,6 +591,115 @@ def aggregate(
|
|
578
591
|
aggregator.translate(input_dir, output_dir, overwrite, collection)
|
579
592
|
|
580
593
|
|
594
|
+
@app.command(
|
595
|
+
help="Partition input code using an LLM.",
|
596
|
+
no_args_is_help=True,
|
597
|
+
)
|
598
|
+
def partition(
|
599
|
+
input_dir: Annotated[
|
600
|
+
Path,
|
601
|
+
typer.Option(
|
602
|
+
"--input",
|
603
|
+
"-i",
|
604
|
+
help="The directory containing the source code to be partitioned. ",
|
605
|
+
),
|
606
|
+
],
|
607
|
+
language: Annotated[
|
608
|
+
str,
|
609
|
+
typer.Option(
|
610
|
+
"--language",
|
611
|
+
"-l",
|
612
|
+
help="The language of the source code.",
|
613
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
614
|
+
),
|
615
|
+
],
|
616
|
+
output_dir: Annotated[
|
617
|
+
Path,
|
618
|
+
typer.Option(
|
619
|
+
"--output-dir", "-o", help="The directory to store the partitioned code in."
|
620
|
+
),
|
621
|
+
],
|
622
|
+
llm_name: Annotated[
|
623
|
+
str,
|
624
|
+
typer.Option(
|
625
|
+
"--llm",
|
626
|
+
"-L",
|
627
|
+
help="The custom name of the model set with 'janus llm add'.",
|
628
|
+
),
|
629
|
+
] = "gpt-4o",
|
630
|
+
max_prompts: Annotated[
|
631
|
+
int,
|
632
|
+
typer.Option(
|
633
|
+
"--max-prompts",
|
634
|
+
"-m",
|
635
|
+
help="The maximum number of times to prompt a model on one functional block "
|
636
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
637
|
+
),
|
638
|
+
] = 10,
|
639
|
+
overwrite: Annotated[
|
640
|
+
bool,
|
641
|
+
typer.Option(
|
642
|
+
"--overwrite/--preserve",
|
643
|
+
help="Whether to overwrite existing files in the output directory",
|
644
|
+
),
|
645
|
+
] = False,
|
646
|
+
temperature: Annotated[
|
647
|
+
float,
|
648
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
649
|
+
] = 0.7,
|
650
|
+
splitter_type: Annotated[
|
651
|
+
str,
|
652
|
+
typer.Option(
|
653
|
+
"-S",
|
654
|
+
"--splitter",
|
655
|
+
help="Name of custom splitter to use",
|
656
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
657
|
+
),
|
658
|
+
] = "file",
|
659
|
+
refiner_types: Annotated[
|
660
|
+
list[str],
|
661
|
+
typer.Option(
|
662
|
+
"-r",
|
663
|
+
"--refiner",
|
664
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
665
|
+
refinement chain",
|
666
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
667
|
+
),
|
668
|
+
] = ["JanusRefiner"],
|
669
|
+
max_tokens: Annotated[
|
670
|
+
int,
|
671
|
+
typer.Option(
|
672
|
+
"--max-tokens",
|
673
|
+
"-M",
|
674
|
+
help="The maximum number of tokens the model will take in. "
|
675
|
+
"If unspecificed, model's default max will be used.",
|
676
|
+
),
|
677
|
+
] = None,
|
678
|
+
partition_token_limit: Annotated[
|
679
|
+
int,
|
680
|
+
typer.Option(
|
681
|
+
"--partition-tokens",
|
682
|
+
"-pt",
|
683
|
+
help="The limit on the number of tokens per partition.",
|
684
|
+
),
|
685
|
+
] = 8192,
|
686
|
+
):
|
687
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
688
|
+
model_arguments = dict(temperature=temperature)
|
689
|
+
kwargs = dict(
|
690
|
+
model=llm_name,
|
691
|
+
model_arguments=model_arguments,
|
692
|
+
source_language=language,
|
693
|
+
max_prompts=max_prompts,
|
694
|
+
max_tokens=max_tokens,
|
695
|
+
splitter_type=splitter_type,
|
696
|
+
refiner_types=refiner_types,
|
697
|
+
partition_token_limit=partition_token_limit,
|
698
|
+
)
|
699
|
+
partitioner = Partitioner(**kwargs)
|
700
|
+
partitioner.translate(input_dir, output_dir, overwrite)
|
701
|
+
|
702
|
+
|
581
703
|
@app.command(
|
582
704
|
help="Diagram input code using an LLM.",
|
583
705
|
no_args_is_help=True,
|
@@ -667,25 +789,27 @@ def diagram(
|
|
667
789
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
668
790
|
),
|
669
791
|
] = "file",
|
670
|
-
|
671
|
-
str,
|
792
|
+
refiner_types: Annotated[
|
793
|
+
list[str],
|
672
794
|
typer.Option(
|
673
795
|
"-r",
|
674
796
|
"--refiner",
|
675
|
-
help="
|
797
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
798
|
+
refinement chain",
|
676
799
|
click_type=click.Choice(list(REFINERS.keys())),
|
677
800
|
),
|
678
|
-
] = "
|
801
|
+
] = ["JanusRefiner"],
|
679
802
|
retriever_type: Annotated[
|
680
803
|
str,
|
681
804
|
typer.Option(
|
682
805
|
"-R",
|
683
806
|
"--retriever",
|
684
807
|
help="Name of custom retriever to use",
|
685
|
-
click_type=click.Choice(["active_usings"]),
|
808
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
686
809
|
),
|
687
810
|
] = None,
|
688
811
|
):
|
812
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
689
813
|
model_arguments = dict(temperature=temperature)
|
690
814
|
collections_config = get_collections_config()
|
691
815
|
diagram_generator = DiagramGenerator(
|
@@ -696,7 +820,7 @@ def diagram(
|
|
696
820
|
db_path=db_loc,
|
697
821
|
db_config=collections_config,
|
698
822
|
splitter_type=splitter_type,
|
699
|
-
|
823
|
+
refiner_types=refiner_types,
|
700
824
|
retriever_type=retriever_type,
|
701
825
|
diagram_type=diagram_type,
|
702
826
|
add_documentation=add_documentation,
|
@@ -704,6 +828,139 @@ def diagram(
|
|
704
828
|
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
705
829
|
|
706
830
|
|
831
|
+
@app.command(
|
832
|
+
help="LLM self evaluation",
|
833
|
+
no_args_is_help=True,
|
834
|
+
)
|
835
|
+
def llm_self_eval(
|
836
|
+
input_dir: Annotated[
|
837
|
+
Path,
|
838
|
+
typer.Option(
|
839
|
+
"--input",
|
840
|
+
"-i",
|
841
|
+
help="The directory containing the source code to be evaluated. "
|
842
|
+
"The files should all be in one flat directory.",
|
843
|
+
),
|
844
|
+
],
|
845
|
+
language: Annotated[
|
846
|
+
str,
|
847
|
+
typer.Option(
|
848
|
+
"--language",
|
849
|
+
"-l",
|
850
|
+
help="The language of the source code.",
|
851
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
852
|
+
),
|
853
|
+
],
|
854
|
+
output_dir: Annotated[
|
855
|
+
Path,
|
856
|
+
typer.Option(
|
857
|
+
"--output-dir", "-o", help="The directory to store the evaluations in."
|
858
|
+
),
|
859
|
+
],
|
860
|
+
llm_name: Annotated[
|
861
|
+
str,
|
862
|
+
typer.Option(
|
863
|
+
"--llm",
|
864
|
+
"-L",
|
865
|
+
help="The custom name of the model set with 'janus llm add'.",
|
866
|
+
),
|
867
|
+
] = "gpt-4o",
|
868
|
+
evaluation_type: Annotated[
|
869
|
+
str,
|
870
|
+
typer.Option(
|
871
|
+
"--evaluation-type",
|
872
|
+
"-e",
|
873
|
+
help="Type of output to evaluate.",
|
874
|
+
click_type=click.Choice(["incose", "comments"]),
|
875
|
+
),
|
876
|
+
] = "incose",
|
877
|
+
max_prompts: Annotated[
|
878
|
+
int,
|
879
|
+
typer.Option(
|
880
|
+
"--max-prompts",
|
881
|
+
"-m",
|
882
|
+
help="The maximum number of times to prompt a model on one functional block "
|
883
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
884
|
+
),
|
885
|
+
] = 10,
|
886
|
+
overwrite: Annotated[
|
887
|
+
bool,
|
888
|
+
typer.Option(
|
889
|
+
"--overwrite/--preserve",
|
890
|
+
help="Whether to overwrite existing files in the output directory",
|
891
|
+
),
|
892
|
+
] = False,
|
893
|
+
temperature: Annotated[
|
894
|
+
float,
|
895
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
896
|
+
] = 0.7,
|
897
|
+
collection: Annotated[
|
898
|
+
str,
|
899
|
+
typer.Option(
|
900
|
+
"--collection",
|
901
|
+
"-c",
|
902
|
+
help="If set, will put the translated result into a Chroma DB "
|
903
|
+
"collection with the name provided.",
|
904
|
+
),
|
905
|
+
] = None,
|
906
|
+
splitter_type: Annotated[
|
907
|
+
str,
|
908
|
+
typer.Option(
|
909
|
+
"-S",
|
910
|
+
"--splitter",
|
911
|
+
help="Name of custom splitter to use",
|
912
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
913
|
+
),
|
914
|
+
] = "file",
|
915
|
+
refiner_types: Annotated[
|
916
|
+
list[str],
|
917
|
+
typer.Option(
|
918
|
+
"-r",
|
919
|
+
"--refiner",
|
920
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
921
|
+
refinement chain",
|
922
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
923
|
+
),
|
924
|
+
] = ["JanusRefiner"],
|
925
|
+
eval_items_per_request: Annotated[
|
926
|
+
int,
|
927
|
+
typer.Option(
|
928
|
+
"--eval-items-per-request",
|
929
|
+
"-rc",
|
930
|
+
help="The maximum number of evaluation items per request",
|
931
|
+
),
|
932
|
+
] = None,
|
933
|
+
max_tokens: Annotated[
|
934
|
+
int,
|
935
|
+
typer.Option(
|
936
|
+
"--max-tokens",
|
937
|
+
"-M",
|
938
|
+
help="The maximum number of tokens the model will take in. "
|
939
|
+
"If unspecificed, model's default max will be used.",
|
940
|
+
),
|
941
|
+
] = None,
|
942
|
+
):
|
943
|
+
model_arguments = dict(temperature=temperature)
|
944
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
945
|
+
kwargs = dict(
|
946
|
+
eval_items_per_request=eval_items_per_request,
|
947
|
+
model=llm_name,
|
948
|
+
model_arguments=model_arguments,
|
949
|
+
source_language=language,
|
950
|
+
max_prompts=max_prompts,
|
951
|
+
max_tokens=max_tokens,
|
952
|
+
splitter_type=splitter_type,
|
953
|
+
refiner_types=refiner_types,
|
954
|
+
)
|
955
|
+
# Setting parser type here
|
956
|
+
if evaluation_type == "incose":
|
957
|
+
evaluator = RequirementEvaluator(**kwargs)
|
958
|
+
elif evaluation_type == "comments":
|
959
|
+
evaluator = InlineCommentEvaluator(**kwargs)
|
960
|
+
|
961
|
+
evaluator.translate(input_dir, output_dir, overwrite, collection)
|
962
|
+
|
963
|
+
|
707
964
|
@db.command("init", help="Connect to or create a database.")
|
708
965
|
def db_init(
|
709
966
|
path: Annotated[
|
@@ -1005,13 +1262,12 @@ def llm_add(
|
|
1005
1262
|
show_choices=False,
|
1006
1263
|
)
|
1007
1264
|
params = dict(
|
1008
|
-
|
1009
|
-
model_name=MODEL_ID_TO_LONG_ID[model_id],
|
1265
|
+
model_name=model_name,
|
1010
1266
|
temperature=0.7,
|
1011
1267
|
n=1,
|
1012
1268
|
)
|
1013
|
-
max_tokens = TOKEN_LIMITS[
|
1014
|
-
model_cost = COST_PER_1K_TOKENS[
|
1269
|
+
max_tokens = TOKEN_LIMITS[model_name]
|
1270
|
+
model_cost = COST_PER_1K_TOKENS[model_name]
|
1015
1271
|
cfg = {
|
1016
1272
|
"model_type": model_type,
|
1017
1273
|
"model_id": model_id,
|
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
|
|
2
2
|
from janus.converter.diagram import DiagramGenerator
|
3
3
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
4
4
|
from janus.converter.evaluate import Evaluator
|
5
|
+
from janus.converter.partition import Partitioner
|
5
6
|
from janus.converter.requirements import RequirementsDocumenter
|
6
7
|
from janus.converter.translate import Translator
|