janus-llm 4.0.0__py3-none-any.whl → 4.2.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +1 -1
- janus/cli.py +161 -26
- janus/converter/__init__.py +1 -0
- janus/converter/_tests/test_translate.py +2 -2
- janus/converter/converter.py +45 -47
- janus/converter/partition.py +27 -0
- janus/language/combine.py +22 -0
- janus/llm/model_callbacks.py +9 -0
- janus/llm/models_info.py +41 -17
- janus/parsers/partition_parser.py +136 -0
- janus/refiners/refiner.py +8 -12
- janus/refiners/uml.py +33 -0
- janus/retrievers/retriever.py +60 -0
- janus/utils/pdf_docs_reader.py +134 -0
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/METADATA +9 -1
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/RECORD +19 -15
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/WHEEL +1 -1
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/LICENSE +0 -0
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
|
|
5
5
|
from janus.converter.translate import Translator
|
6
6
|
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "4.
|
8
|
+
__version__ = "4.2.0"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/cli.py
CHANGED
@@ -13,10 +13,13 @@ from rich.console import Console
|
|
13
13
|
from rich.prompt import Confirm
|
14
14
|
from typing_extensions import Annotated
|
15
15
|
|
16
|
+
import janus.refiners.refiner
|
17
|
+
import janus.refiners.uml
|
16
18
|
from janus.converter.aggregator import Aggregator
|
17
19
|
from janus.converter.converter import Converter
|
18
20
|
from janus.converter.diagram import DiagramGenerator
|
19
21
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
22
|
+
from janus.converter.partition import Partitioner
|
20
23
|
from janus.converter.requirements import RequirementsDocumenter
|
21
24
|
from janus.converter.translate import Translator
|
22
25
|
from janus.embedding.collections import Collections
|
@@ -39,11 +42,11 @@ from janus.llm.models_info import (
|
|
39
42
|
MODEL_TYPE_CONSTRUCTORS,
|
40
43
|
MODEL_TYPES,
|
41
44
|
TOKEN_LIMITS,
|
45
|
+
azure_models,
|
42
46
|
bedrock_models,
|
43
47
|
openai_models,
|
44
48
|
)
|
45
49
|
from janus.metrics.cli import evaluate
|
46
|
-
from janus.refiners.refiner import REFINERS
|
47
50
|
from janus.utils.enums import LANGUAGES
|
48
51
|
from janus.utils.logger import create_logger
|
49
52
|
|
@@ -68,6 +71,18 @@ with open(db_file, "r") as f:
|
|
68
71
|
collections_config_file = Path(db_loc) / "collections.json"
|
69
72
|
|
70
73
|
|
74
|
+
def get_subclasses(cls):
|
75
|
+
return set(cls.__subclasses__()).union(
|
76
|
+
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
77
|
+
)
|
78
|
+
|
79
|
+
|
80
|
+
REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
|
81
|
+
{janus.refiners.refiner.JanusRefiner}
|
82
|
+
)
|
83
|
+
REFINERS = {r.__name__: r for r in REFINER_TYPES}
|
84
|
+
|
85
|
+
|
71
86
|
def get_collections_config():
|
72
87
|
if collections_config_file.exists():
|
73
88
|
with open(collections_config_file, "r") as f:
|
@@ -243,22 +258,23 @@ def translate(
|
|
243
258
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
244
259
|
),
|
245
260
|
] = "file",
|
246
|
-
|
247
|
-
str,
|
261
|
+
refiner_types: Annotated[
|
262
|
+
list[str],
|
248
263
|
typer.Option(
|
249
264
|
"-r",
|
250
265
|
"--refiner",
|
251
|
-
help="
|
266
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
267
|
+
refinement chain",
|
252
268
|
click_type=click.Choice(list(REFINERS.keys())),
|
253
269
|
),
|
254
|
-
] = "
|
270
|
+
] = ["JanusRefiner"],
|
255
271
|
retriever_type: Annotated[
|
256
272
|
str,
|
257
273
|
typer.Option(
|
258
274
|
"-R",
|
259
275
|
"--retriever",
|
260
276
|
help="Name of custom retriever to use",
|
261
|
-
click_type=click.Choice(["active_usings"]),
|
277
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
262
278
|
),
|
263
279
|
] = None,
|
264
280
|
max_tokens: Annotated[
|
@@ -271,6 +287,7 @@ def translate(
|
|
271
287
|
),
|
272
288
|
] = None,
|
273
289
|
):
|
290
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
274
291
|
try:
|
275
292
|
target_language, target_version = target_lang.split("-")
|
276
293
|
except ValueError:
|
@@ -295,7 +312,7 @@ def translate(
|
|
295
312
|
db_path=db_loc,
|
296
313
|
db_config=collections_config,
|
297
314
|
splitter_type=splitter_type,
|
298
|
-
|
315
|
+
refiner_types=refiner_types,
|
299
316
|
retriever_type=retriever_type,
|
300
317
|
)
|
301
318
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
@@ -401,22 +418,23 @@ def document(
|
|
401
418
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
402
419
|
),
|
403
420
|
] = "file",
|
404
|
-
|
405
|
-
str,
|
421
|
+
refiner_types: Annotated[
|
422
|
+
list[str],
|
406
423
|
typer.Option(
|
407
424
|
"-r",
|
408
425
|
"--refiner",
|
409
|
-
help="
|
426
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
427
|
+
refinement chain",
|
410
428
|
click_type=click.Choice(list(REFINERS.keys())),
|
411
429
|
),
|
412
|
-
] = "
|
430
|
+
] = ["JanusRefiner"],
|
413
431
|
retriever_type: Annotated[
|
414
432
|
str,
|
415
433
|
typer.Option(
|
416
434
|
"-R",
|
417
435
|
"--retriever",
|
418
436
|
help="Name of custom retriever to use",
|
419
|
-
click_type=click.Choice(["active_usings"]),
|
437
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
420
438
|
),
|
421
439
|
] = None,
|
422
440
|
max_tokens: Annotated[
|
@@ -429,6 +447,7 @@ def document(
|
|
429
447
|
),
|
430
448
|
] = None,
|
431
449
|
):
|
450
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
432
451
|
model_arguments = dict(temperature=temperature)
|
433
452
|
collections_config = get_collections_config()
|
434
453
|
kwargs = dict(
|
@@ -440,7 +459,7 @@ def document(
|
|
440
459
|
db_path=db_loc,
|
441
460
|
db_config=collections_config,
|
442
461
|
splitter_type=splitter_type,
|
443
|
-
|
462
|
+
refiner_types=refiner_types,
|
444
463
|
retriever_type=retriever_type,
|
445
464
|
)
|
446
465
|
if doc_mode == "madlibs":
|
@@ -457,12 +476,6 @@ def document(
|
|
457
476
|
documenter.translate(input_dir, output_dir, overwrite, collection)
|
458
477
|
|
459
478
|
|
460
|
-
def get_subclasses(cls):
|
461
|
-
return set(cls.__subclasses__()).union(
|
462
|
-
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
463
|
-
)
|
464
|
-
|
465
|
-
|
466
479
|
@app.command()
|
467
480
|
def aggregate(
|
468
481
|
input_dir: Annotated[
|
@@ -577,6 +590,103 @@ def aggregate(
|
|
577
590
|
aggregator.translate(input_dir, output_dir, overwrite, collection)
|
578
591
|
|
579
592
|
|
593
|
+
@app.command(
|
594
|
+
help="Partition input code using an LLM.",
|
595
|
+
no_args_is_help=True,
|
596
|
+
)
|
597
|
+
def partition(
|
598
|
+
input_dir: Annotated[
|
599
|
+
Path,
|
600
|
+
typer.Option(
|
601
|
+
"--input",
|
602
|
+
"-i",
|
603
|
+
help="The directory containing the source code to be partitioned. ",
|
604
|
+
),
|
605
|
+
],
|
606
|
+
language: Annotated[
|
607
|
+
str,
|
608
|
+
typer.Option(
|
609
|
+
"--language",
|
610
|
+
"-l",
|
611
|
+
help="The language of the source code.",
|
612
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
613
|
+
),
|
614
|
+
],
|
615
|
+
output_dir: Annotated[
|
616
|
+
Path,
|
617
|
+
typer.Option(
|
618
|
+
"--output-dir", "-o", help="The directory to store the partitioned code in."
|
619
|
+
),
|
620
|
+
],
|
621
|
+
llm_name: Annotated[
|
622
|
+
str,
|
623
|
+
typer.Option(
|
624
|
+
"--llm",
|
625
|
+
"-L",
|
626
|
+
help="The custom name of the model set with 'janus llm add'.",
|
627
|
+
),
|
628
|
+
] = "gpt-4o",
|
629
|
+
max_prompts: Annotated[
|
630
|
+
int,
|
631
|
+
typer.Option(
|
632
|
+
"--max-prompts",
|
633
|
+
"-m",
|
634
|
+
help="The maximum number of times to prompt a model on one functional block "
|
635
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
636
|
+
),
|
637
|
+
] = 10,
|
638
|
+
overwrite: Annotated[
|
639
|
+
bool,
|
640
|
+
typer.Option(
|
641
|
+
"--overwrite/--preserve",
|
642
|
+
help="Whether to overwrite existing files in the output directory",
|
643
|
+
),
|
644
|
+
] = False,
|
645
|
+
temperature: Annotated[
|
646
|
+
float,
|
647
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
648
|
+
] = 0.7,
|
649
|
+
splitter_type: Annotated[
|
650
|
+
str,
|
651
|
+
typer.Option(
|
652
|
+
"-S",
|
653
|
+
"--splitter",
|
654
|
+
help="Name of custom splitter to use",
|
655
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
656
|
+
),
|
657
|
+
] = "file",
|
658
|
+
max_tokens: Annotated[
|
659
|
+
int,
|
660
|
+
typer.Option(
|
661
|
+
"--max-tokens",
|
662
|
+
"-M",
|
663
|
+
help="The maximum number of tokens the model will take in. "
|
664
|
+
"If unspecificed, model's default max will be used.",
|
665
|
+
),
|
666
|
+
] = None,
|
667
|
+
partition_token_limit: Annotated[
|
668
|
+
int,
|
669
|
+
typer.Option(
|
670
|
+
"--partition-tokens",
|
671
|
+
"-pt",
|
672
|
+
help="The limit on the number of tokens per partition.",
|
673
|
+
),
|
674
|
+
] = 8192,
|
675
|
+
):
|
676
|
+
model_arguments = dict(temperature=temperature)
|
677
|
+
kwargs = dict(
|
678
|
+
model=llm_name,
|
679
|
+
model_arguments=model_arguments,
|
680
|
+
source_language=language,
|
681
|
+
max_prompts=max_prompts,
|
682
|
+
max_tokens=max_tokens,
|
683
|
+
splitter_type=splitter_type,
|
684
|
+
partition_token_limit=partition_token_limit,
|
685
|
+
)
|
686
|
+
partitioner = Partitioner(**kwargs)
|
687
|
+
partitioner.translate(input_dir, output_dir, overwrite)
|
688
|
+
|
689
|
+
|
580
690
|
@app.command(
|
581
691
|
help="Diagram input code using an LLM.",
|
582
692
|
no_args_is_help=True,
|
@@ -666,25 +776,27 @@ def diagram(
|
|
666
776
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
667
777
|
),
|
668
778
|
] = "file",
|
669
|
-
|
670
|
-
str,
|
779
|
+
refiner_types: Annotated[
|
780
|
+
list[str],
|
671
781
|
typer.Option(
|
672
782
|
"-r",
|
673
783
|
"--refiner",
|
674
|
-
help="
|
784
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
785
|
+
refinement chain",
|
675
786
|
click_type=click.Choice(list(REFINERS.keys())),
|
676
787
|
),
|
677
|
-
] = "
|
788
|
+
] = ["JanusRefiner"],
|
678
789
|
retriever_type: Annotated[
|
679
790
|
str,
|
680
791
|
typer.Option(
|
681
792
|
"-R",
|
682
793
|
"--retriever",
|
683
794
|
help="Name of custom retriever to use",
|
684
|
-
click_type=click.Choice(["active_usings"]),
|
795
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
685
796
|
),
|
686
797
|
] = None,
|
687
798
|
):
|
799
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
688
800
|
model_arguments = dict(temperature=temperature)
|
689
801
|
collections_config = get_collections_config()
|
690
802
|
diagram_generator = DiagramGenerator(
|
@@ -695,7 +807,7 @@ def diagram(
|
|
695
807
|
db_path=db_loc,
|
696
808
|
db_config=collections_config,
|
697
809
|
splitter_type=splitter_type,
|
698
|
-
|
810
|
+
refiner_types=refiner_types,
|
699
811
|
retriever_type=retriever_type,
|
700
812
|
diagram_type=diagram_type,
|
701
813
|
add_documentation=add_documentation,
|
@@ -952,7 +1064,7 @@ def llm_add(
|
|
952
1064
|
help="The type of the model",
|
953
1065
|
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
954
1066
|
),
|
955
|
-
] = "
|
1067
|
+
] = "Azure",
|
956
1068
|
):
|
957
1069
|
if not MODEL_CONFIG_DIR.exists():
|
958
1070
|
MODEL_CONFIG_DIR.mkdir(parents=True)
|
@@ -996,6 +1108,7 @@ def llm_add(
|
|
996
1108
|
"model_cost": {"input": in_cost, "output": out_cost},
|
997
1109
|
}
|
998
1110
|
elif model_type == "OpenAI":
|
1111
|
+
print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
|
999
1112
|
model_id = typer.prompt(
|
1000
1113
|
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1001
1114
|
default="gpt-4o",
|
@@ -1017,6 +1130,28 @@ def llm_add(
|
|
1017
1130
|
"token_limit": max_tokens,
|
1018
1131
|
"model_cost": model_cost,
|
1019
1132
|
}
|
1133
|
+
elif model_type == "Azure":
|
1134
|
+
model_id = typer.prompt(
|
1135
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1136
|
+
default="gpt-4o",
|
1137
|
+
type=click.Choice(azure_models),
|
1138
|
+
show_choices=False,
|
1139
|
+
)
|
1140
|
+
params = dict(
|
1141
|
+
# Azure uses the "azure_deployment" key for what we're calling "long_model_id"
|
1142
|
+
azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
|
1143
|
+
temperature=0.7,
|
1144
|
+
n=1,
|
1145
|
+
)
|
1146
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
1147
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
1148
|
+
cfg = {
|
1149
|
+
"model_type": model_type,
|
1150
|
+
"model_id": model_id,
|
1151
|
+
"model_args": params,
|
1152
|
+
"token_limit": max_tokens,
|
1153
|
+
"model_cost": model_cost,
|
1154
|
+
}
|
1020
1155
|
elif model_type == "BedrockChat" or model_type == "Bedrock":
|
1021
1156
|
model_id = typer.prompt(
|
1022
1157
|
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
janus/converter/__init__.py
CHANGED
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
|
|
2
2
|
from janus.converter.diagram import DiagramGenerator
|
3
3
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
4
4
|
from janus.converter.evaluate import Evaluator
|
5
|
+
from janus.converter.partition import Partitioner
|
5
6
|
from janus.converter.requirements import RequirementsDocumenter
|
6
7
|
from janus.converter.translate import Translator
|
@@ -90,14 +90,14 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
90
90
|
def setUp(self):
|
91
91
|
"""Set up the tests."""
|
92
92
|
self.diagram_generator = DiagramGenerator(
|
93
|
-
model="gpt-4o",
|
93
|
+
model="gpt-4o-mini",
|
94
94
|
source_language="fortran",
|
95
95
|
diagram_type="Activity",
|
96
96
|
)
|
97
97
|
|
98
98
|
def test_init(self):
|
99
99
|
"""Test __init__ method."""
|
100
|
-
self.assertEqual(self.diagram_generator._model_name, "gpt-4o")
|
100
|
+
self.assertEqual(self.diagram_generator._model_name, "gpt-4o-mini")
|
101
101
|
self.assertEqual(self.diagram_generator._source_language, "fortran")
|
102
102
|
self.assertEqual(self.diagram_generator._diagram_type, "Activity")
|
103
103
|
|
janus/converter/converter.py
CHANGED
@@ -6,7 +6,12 @@ from typing import Any
|
|
6
6
|
|
7
7
|
from langchain_core.exceptions import OutputParserException
|
8
8
|
from langchain_core.prompts import ChatPromptTemplate
|
9
|
-
from langchain_core.runnables import
|
9
|
+
from langchain_core.runnables import (
|
10
|
+
Runnable,
|
11
|
+
RunnableLambda,
|
12
|
+
RunnableParallel,
|
13
|
+
RunnablePassthrough,
|
14
|
+
)
|
10
15
|
from openai import BadRequestError, RateLimitError
|
11
16
|
from pydantic import ValidationError
|
12
17
|
|
@@ -23,15 +28,14 @@ from janus.language.splitter import (
|
|
23
28
|
from janus.llm.model_callbacks import get_model_callback
|
24
29
|
from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel, load_model
|
25
30
|
from janus.parsers.parser import GenericParser, JanusParser
|
26
|
-
from janus.refiners.refiner import
|
27
|
-
FixParserExceptions,
|
28
|
-
HallucinationRefiner,
|
29
|
-
JanusRefiner,
|
30
|
-
ReflectionRefiner,
|
31
|
-
)
|
31
|
+
from janus.refiners.refiner import JanusRefiner
|
32
32
|
|
33
33
|
# from janus.refiners.refiner import BasicRefiner, Refiner
|
34
|
-
from janus.retrievers.retriever import
|
34
|
+
from janus.retrievers.retriever import (
|
35
|
+
ActiveUsingsRetriever,
|
36
|
+
JanusRetriever,
|
37
|
+
LanguageDocsRetriever,
|
38
|
+
)
|
35
39
|
from janus.utils.enums import LANGUAGES
|
36
40
|
from janus.utils.logger import create_logger
|
37
41
|
|
@@ -78,7 +82,7 @@ class Converter:
|
|
78
82
|
protected_node_types: tuple[str, ...] = (),
|
79
83
|
prune_node_types: tuple[str, ...] = (),
|
80
84
|
splitter_type: str = "file",
|
81
|
-
|
85
|
+
refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
|
82
86
|
retriever_type: str | None = None,
|
83
87
|
) -> None:
|
84
88
|
"""Initialize a Converter instance.
|
@@ -105,6 +109,7 @@ class Converter:
|
|
105
109
|
- None
|
106
110
|
retriever_type: The type of retriever to use. Valid values:
|
107
111
|
- "active_usings"
|
112
|
+
- "language_docs"
|
108
113
|
- None
|
109
114
|
"""
|
110
115
|
self._changed_attrs: set = set()
|
@@ -133,10 +138,11 @@ class Converter:
|
|
133
138
|
self._prompt: ChatPromptTemplate
|
134
139
|
|
135
140
|
self._parser: JanusParser = GenericParser()
|
141
|
+
self._base_parser: JanusParser = GenericParser()
|
136
142
|
self._combiner: Combiner = Combiner()
|
137
143
|
|
138
144
|
self._splitter_type: str
|
139
|
-
self.
|
145
|
+
self._refiner_types: list[type[JanusRefiner]]
|
140
146
|
self._retriever_type: str | None
|
141
147
|
|
142
148
|
self._splitter: Splitter
|
@@ -144,7 +150,7 @@ class Converter:
|
|
144
150
|
self._retriever: JanusRetriever
|
145
151
|
|
146
152
|
self.set_splitter(splitter_type=splitter_type)
|
147
|
-
self.
|
153
|
+
self.set_refiner_types(refiner_types=refiner_types)
|
148
154
|
self.set_retriever(retriever_type=retriever_type)
|
149
155
|
self.set_model(model_name=model, **model_arguments)
|
150
156
|
self.set_prompt(prompt_template=prompt_template)
|
@@ -170,7 +176,7 @@ class Converter:
|
|
170
176
|
self._load_model()
|
171
177
|
self._load_prompt()
|
172
178
|
self._load_retriever()
|
173
|
-
self.
|
179
|
+
self._load_refiner_chain()
|
174
180
|
self._load_splitter()
|
175
181
|
self._load_vectorizer()
|
176
182
|
self._load_chain()
|
@@ -210,13 +216,13 @@ class Converter:
|
|
210
216
|
|
211
217
|
self._splitter_type = splitter_type
|
212
218
|
|
213
|
-
def
|
219
|
+
def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
|
214
220
|
"""Validate and set the refiner type
|
215
221
|
|
216
222
|
Arguments:
|
217
223
|
refiner_type: the type of refiner to use
|
218
224
|
"""
|
219
|
-
self.
|
225
|
+
self._refiner_types = refiner_types
|
220
226
|
|
221
227
|
def set_retriever(self, retriever_type: str | None) -> None:
|
222
228
|
"""Validate and set the retriever type
|
@@ -355,48 +361,40 @@ class Converter:
|
|
355
361
|
def _load_retriever(self):
|
356
362
|
if self._retriever_type == "active_usings":
|
357
363
|
self._retriever = ActiveUsingsRetriever()
|
364
|
+
elif self._retriever_type == "language_docs":
|
365
|
+
self._retriever = LanguageDocsRetriever(self._llm, self._source_language)
|
358
366
|
else:
|
359
367
|
self._retriever = JanusRetriever()
|
360
368
|
|
361
|
-
@run_if_changed("
|
362
|
-
def
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
max_retries=self.max_prompts,
|
369
|
+
@run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
|
370
|
+
def _load_refiner_chain(self) -> None:
|
371
|
+
self._refiner_chain = RunnableParallel(
|
372
|
+
completion=self._llm,
|
373
|
+
prompt_value=RunnablePassthrough(),
|
374
|
+
)
|
375
|
+
for refiner_type in self._refiner_types[:-1]:
|
376
|
+
# NOTE: Do NOT remove refiner_type=refiner_type from lambda.
|
377
|
+
# Due to lambda capture, must be present or chain will not
|
378
|
+
# be correctly constructed.
|
379
|
+
self._refiner_chain = self._refiner_chain | RunnableParallel(
|
380
|
+
completion=lambda x, refiner_type=refiner_type: refiner_type(
|
381
|
+
llm=self._llm,
|
382
|
+
parser=self._base_parser,
|
383
|
+
max_retries=self.max_prompts,
|
384
|
+
).parse_completion(**x),
|
385
|
+
prompt_value=lambda x: x["prompt_value"],
|
379
386
|
)
|
380
|
-
|
381
|
-
self.
|
387
|
+
self._refiner_chain = self._refiner_chain | RunnableLambda(
|
388
|
+
lambda x: self._refiner_types[-1](
|
382
389
|
llm=self._llm,
|
383
390
|
parser=self._parser,
|
384
391
|
max_retries=self.max_prompts,
|
385
|
-
)
|
386
|
-
|
387
|
-
self._refiner = JanusRefiner(parser=self._parser)
|
392
|
+
).parse_completion(**x)
|
393
|
+
)
|
388
394
|
|
389
|
-
@run_if_changed("_parser", "_retriever", "_prompt", "_llm", "
|
395
|
+
@run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
|
390
396
|
def _load_chain(self):
|
391
|
-
self.chain = (
|
392
|
-
self._input_runnable()
|
393
|
-
| self._prompt
|
394
|
-
| RunnableParallel(
|
395
|
-
completion=self._llm,
|
396
|
-
prompt_value=RunnablePassthrough(),
|
397
|
-
)
|
398
|
-
| self._refiner.parse_runnable
|
399
|
-
)
|
397
|
+
self.chain = self._input_runnable() | self._prompt | self._refiner_chain
|
400
398
|
|
401
399
|
def _input_runnable(self) -> Runnable:
|
402
400
|
return RunnableParallel(
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import TranslatedCodeBlock
|
5
|
+
from janus.parsers.partition_parser import PartitionParser
|
6
|
+
from janus.utils.logger import create_logger
|
7
|
+
|
8
|
+
log = create_logger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class Partitioner(Converter):
|
12
|
+
def __init__(self, partition_token_limit: int, **kwargs):
|
13
|
+
super().__init__(**kwargs)
|
14
|
+
self.set_prompt("partition")
|
15
|
+
self._load_model()
|
16
|
+
self._parser = PartitionParser(
|
17
|
+
token_limit=partition_token_limit,
|
18
|
+
model=self._llm,
|
19
|
+
)
|
20
|
+
self._target_language = self._source_language
|
21
|
+
self._target_suffix = self._source_suffix
|
22
|
+
self._load_parameters()
|
23
|
+
|
24
|
+
def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
|
25
|
+
output_str = self._parser.parse_combined_output(block.complete_text)
|
26
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
27
|
+
out_path.write_text(output_str, encoding="utf-8")
|
janus/language/combine.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
import re
|
2
|
+
|
1
3
|
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
2
4
|
from janus.language.file import FileManager
|
3
5
|
from janus.utils.logger import create_logger
|
@@ -90,3 +92,23 @@ class ChunkCombiner(Combiner):
|
|
90
92
|
root: The functional code block to combine with its children.
|
91
93
|
"""
|
92
94
|
return root
|
95
|
+
|
96
|
+
|
97
|
+
class PartitionCombiner(Combiner):
|
98
|
+
@staticmethod
|
99
|
+
def combine(root: CodeBlock) -> None:
|
100
|
+
"""A combiner which inserts partition tags between code blocks"""
|
101
|
+
queue = [root]
|
102
|
+
while queue:
|
103
|
+
block = queue.pop(0)
|
104
|
+
if block.children:
|
105
|
+
queue.extend(block.children)
|
106
|
+
else:
|
107
|
+
block.affixes = (block.prefix, block.suffix + "\n<JANUS_PARTITION>\n")
|
108
|
+
|
109
|
+
super(PartitionCombiner, PartitionCombiner).combine(root)
|
110
|
+
root.text = re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.text)
|
111
|
+
root.affixes = (
|
112
|
+
root.prefix,
|
113
|
+
re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.suffix),
|
114
|
+
)
|
janus/llm/model_callbacks.py
CHANGED
@@ -23,6 +23,11 @@ openai_model_reroutes = {
|
|
23
23
|
"gpt-3.5-turbo-16k-0613": "gpt-3.5-turbo-0125",
|
24
24
|
}
|
25
25
|
|
26
|
+
azure_model_reroutes = {
|
27
|
+
"gpt-4o": "gpt-4o-2024-08-06",
|
28
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
29
|
+
"gpt-3.5-turbo-16k": "gpt35-turbo-16k",
|
30
|
+
}
|
26
31
|
|
27
32
|
# Updated 2024-06-21
|
28
33
|
COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
|
@@ -31,6 +36,10 @@ COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
|
|
31
36
|
"gpt-4-0125-preview": {"input": 0.01, "output": 0.03},
|
32
37
|
"gpt-4-0613": {"input": 0.03, "output": 0.06},
|
33
38
|
"gpt-4o-2024-05-13": {"input": 0.005, "output": 0.015},
|
39
|
+
"gpt-4o-2024-08-06": {"input": 0.00275, "output": 0.011},
|
40
|
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
41
|
+
"gpt35-turbo-16k": {"input": 0.003, "output": 0.004},
|
42
|
+
"gpt-35-turbo-16k": {"input": 0.003, "output": 0.004},
|
34
43
|
"anthropic.claude-v2": {"input": 0.008, "output": 0.024},
|
35
44
|
"anthropic.claude-instant-v1": {"input": 0.0008, "output": 0.0024},
|
36
45
|
"anthropic.claude-3-haiku-20240307-v1:0": {"input": 0.00025, "output": 0.00125},
|