janus-llm 4.0.0__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli.py +161 -26
- janus/converter/__init__.py +1 -0
- janus/converter/_tests/test_translate.py +2 -2
- janus/converter/converter.py +45 -47
- janus/converter/partition.py +27 -0
- janus/language/combine.py +22 -0
- janus/llm/model_callbacks.py +9 -0
- janus/llm/models_info.py +41 -17
- janus/parsers/partition_parser.py +136 -0
- janus/refiners/refiner.py +8 -12
- janus/refiners/uml.py +33 -0
- janus/retrievers/retriever.py +60 -0
- janus/utils/pdf_docs_reader.py +134 -0
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/METADATA +9 -1
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/RECORD +19 -15
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/WHEEL +1 -1
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/LICENSE +0 -0
- {janus_llm-4.0.0.dist-info → janus_llm-4.2.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
|
|
5
5
|
from janus.converter.translate import Translator
|
6
6
|
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "4.
|
8
|
+
__version__ = "4.2.0"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/cli.py
CHANGED
@@ -13,10 +13,13 @@ from rich.console import Console
|
|
13
13
|
from rich.prompt import Confirm
|
14
14
|
from typing_extensions import Annotated
|
15
15
|
|
16
|
+
import janus.refiners.refiner
|
17
|
+
import janus.refiners.uml
|
16
18
|
from janus.converter.aggregator import Aggregator
|
17
19
|
from janus.converter.converter import Converter
|
18
20
|
from janus.converter.diagram import DiagramGenerator
|
19
21
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
22
|
+
from janus.converter.partition import Partitioner
|
20
23
|
from janus.converter.requirements import RequirementsDocumenter
|
21
24
|
from janus.converter.translate import Translator
|
22
25
|
from janus.embedding.collections import Collections
|
@@ -39,11 +42,11 @@ from janus.llm.models_info import (
|
|
39
42
|
MODEL_TYPE_CONSTRUCTORS,
|
40
43
|
MODEL_TYPES,
|
41
44
|
TOKEN_LIMITS,
|
45
|
+
azure_models,
|
42
46
|
bedrock_models,
|
43
47
|
openai_models,
|
44
48
|
)
|
45
49
|
from janus.metrics.cli import evaluate
|
46
|
-
from janus.refiners.refiner import REFINERS
|
47
50
|
from janus.utils.enums import LANGUAGES
|
48
51
|
from janus.utils.logger import create_logger
|
49
52
|
|
@@ -68,6 +71,18 @@ with open(db_file, "r") as f:
|
|
68
71
|
collections_config_file = Path(db_loc) / "collections.json"
|
69
72
|
|
70
73
|
|
74
|
+
def get_subclasses(cls):
|
75
|
+
return set(cls.__subclasses__()).union(
|
76
|
+
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
77
|
+
)
|
78
|
+
|
79
|
+
|
80
|
+
REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
|
81
|
+
{janus.refiners.refiner.JanusRefiner}
|
82
|
+
)
|
83
|
+
REFINERS = {r.__name__: r for r in REFINER_TYPES}
|
84
|
+
|
85
|
+
|
71
86
|
def get_collections_config():
|
72
87
|
if collections_config_file.exists():
|
73
88
|
with open(collections_config_file, "r") as f:
|
@@ -243,22 +258,23 @@ def translate(
|
|
243
258
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
244
259
|
),
|
245
260
|
] = "file",
|
246
|
-
|
247
|
-
str,
|
261
|
+
refiner_types: Annotated[
|
262
|
+
list[str],
|
248
263
|
typer.Option(
|
249
264
|
"-r",
|
250
265
|
"--refiner",
|
251
|
-
help="
|
266
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
267
|
+
refinement chain",
|
252
268
|
click_type=click.Choice(list(REFINERS.keys())),
|
253
269
|
),
|
254
|
-
] = "
|
270
|
+
] = ["JanusRefiner"],
|
255
271
|
retriever_type: Annotated[
|
256
272
|
str,
|
257
273
|
typer.Option(
|
258
274
|
"-R",
|
259
275
|
"--retriever",
|
260
276
|
help="Name of custom retriever to use",
|
261
|
-
click_type=click.Choice(["active_usings"]),
|
277
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
262
278
|
),
|
263
279
|
] = None,
|
264
280
|
max_tokens: Annotated[
|
@@ -271,6 +287,7 @@ def translate(
|
|
271
287
|
),
|
272
288
|
] = None,
|
273
289
|
):
|
290
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
274
291
|
try:
|
275
292
|
target_language, target_version = target_lang.split("-")
|
276
293
|
except ValueError:
|
@@ -295,7 +312,7 @@ def translate(
|
|
295
312
|
db_path=db_loc,
|
296
313
|
db_config=collections_config,
|
297
314
|
splitter_type=splitter_type,
|
298
|
-
|
315
|
+
refiner_types=refiner_types,
|
299
316
|
retriever_type=retriever_type,
|
300
317
|
)
|
301
318
|
translator.translate(input_dir, output_dir, overwrite, collection)
|
@@ -401,22 +418,23 @@ def document(
|
|
401
418
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
402
419
|
),
|
403
420
|
] = "file",
|
404
|
-
|
405
|
-
str,
|
421
|
+
refiner_types: Annotated[
|
422
|
+
list[str],
|
406
423
|
typer.Option(
|
407
424
|
"-r",
|
408
425
|
"--refiner",
|
409
|
-
help="
|
426
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
427
|
+
refinement chain",
|
410
428
|
click_type=click.Choice(list(REFINERS.keys())),
|
411
429
|
),
|
412
|
-
] = "
|
430
|
+
] = ["JanusRefiner"],
|
413
431
|
retriever_type: Annotated[
|
414
432
|
str,
|
415
433
|
typer.Option(
|
416
434
|
"-R",
|
417
435
|
"--retriever",
|
418
436
|
help="Name of custom retriever to use",
|
419
|
-
click_type=click.Choice(["active_usings"]),
|
437
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
420
438
|
),
|
421
439
|
] = None,
|
422
440
|
max_tokens: Annotated[
|
@@ -429,6 +447,7 @@ def document(
|
|
429
447
|
),
|
430
448
|
] = None,
|
431
449
|
):
|
450
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
432
451
|
model_arguments = dict(temperature=temperature)
|
433
452
|
collections_config = get_collections_config()
|
434
453
|
kwargs = dict(
|
@@ -440,7 +459,7 @@ def document(
|
|
440
459
|
db_path=db_loc,
|
441
460
|
db_config=collections_config,
|
442
461
|
splitter_type=splitter_type,
|
443
|
-
|
462
|
+
refiner_types=refiner_types,
|
444
463
|
retriever_type=retriever_type,
|
445
464
|
)
|
446
465
|
if doc_mode == "madlibs":
|
@@ -457,12 +476,6 @@ def document(
|
|
457
476
|
documenter.translate(input_dir, output_dir, overwrite, collection)
|
458
477
|
|
459
478
|
|
460
|
-
def get_subclasses(cls):
|
461
|
-
return set(cls.__subclasses__()).union(
|
462
|
-
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
463
|
-
)
|
464
|
-
|
465
|
-
|
466
479
|
@app.command()
|
467
480
|
def aggregate(
|
468
481
|
input_dir: Annotated[
|
@@ -577,6 +590,103 @@ def aggregate(
|
|
577
590
|
aggregator.translate(input_dir, output_dir, overwrite, collection)
|
578
591
|
|
579
592
|
|
593
|
+
@app.command(
|
594
|
+
help="Partition input code using an LLM.",
|
595
|
+
no_args_is_help=True,
|
596
|
+
)
|
597
|
+
def partition(
|
598
|
+
input_dir: Annotated[
|
599
|
+
Path,
|
600
|
+
typer.Option(
|
601
|
+
"--input",
|
602
|
+
"-i",
|
603
|
+
help="The directory containing the source code to be partitioned. ",
|
604
|
+
),
|
605
|
+
],
|
606
|
+
language: Annotated[
|
607
|
+
str,
|
608
|
+
typer.Option(
|
609
|
+
"--language",
|
610
|
+
"-l",
|
611
|
+
help="The language of the source code.",
|
612
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
613
|
+
),
|
614
|
+
],
|
615
|
+
output_dir: Annotated[
|
616
|
+
Path,
|
617
|
+
typer.Option(
|
618
|
+
"--output-dir", "-o", help="The directory to store the partitioned code in."
|
619
|
+
),
|
620
|
+
],
|
621
|
+
llm_name: Annotated[
|
622
|
+
str,
|
623
|
+
typer.Option(
|
624
|
+
"--llm",
|
625
|
+
"-L",
|
626
|
+
help="The custom name of the model set with 'janus llm add'.",
|
627
|
+
),
|
628
|
+
] = "gpt-4o",
|
629
|
+
max_prompts: Annotated[
|
630
|
+
int,
|
631
|
+
typer.Option(
|
632
|
+
"--max-prompts",
|
633
|
+
"-m",
|
634
|
+
help="The maximum number of times to prompt a model on one functional block "
|
635
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
636
|
+
),
|
637
|
+
] = 10,
|
638
|
+
overwrite: Annotated[
|
639
|
+
bool,
|
640
|
+
typer.Option(
|
641
|
+
"--overwrite/--preserve",
|
642
|
+
help="Whether to overwrite existing files in the output directory",
|
643
|
+
),
|
644
|
+
] = False,
|
645
|
+
temperature: Annotated[
|
646
|
+
float,
|
647
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
648
|
+
] = 0.7,
|
649
|
+
splitter_type: Annotated[
|
650
|
+
str,
|
651
|
+
typer.Option(
|
652
|
+
"-S",
|
653
|
+
"--splitter",
|
654
|
+
help="Name of custom splitter to use",
|
655
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
656
|
+
),
|
657
|
+
] = "file",
|
658
|
+
max_tokens: Annotated[
|
659
|
+
int,
|
660
|
+
typer.Option(
|
661
|
+
"--max-tokens",
|
662
|
+
"-M",
|
663
|
+
help="The maximum number of tokens the model will take in. "
|
664
|
+
"If unspecificed, model's default max will be used.",
|
665
|
+
),
|
666
|
+
] = None,
|
667
|
+
partition_token_limit: Annotated[
|
668
|
+
int,
|
669
|
+
typer.Option(
|
670
|
+
"--partition-tokens",
|
671
|
+
"-pt",
|
672
|
+
help="The limit on the number of tokens per partition.",
|
673
|
+
),
|
674
|
+
] = 8192,
|
675
|
+
):
|
676
|
+
model_arguments = dict(temperature=temperature)
|
677
|
+
kwargs = dict(
|
678
|
+
model=llm_name,
|
679
|
+
model_arguments=model_arguments,
|
680
|
+
source_language=language,
|
681
|
+
max_prompts=max_prompts,
|
682
|
+
max_tokens=max_tokens,
|
683
|
+
splitter_type=splitter_type,
|
684
|
+
partition_token_limit=partition_token_limit,
|
685
|
+
)
|
686
|
+
partitioner = Partitioner(**kwargs)
|
687
|
+
partitioner.translate(input_dir, output_dir, overwrite)
|
688
|
+
|
689
|
+
|
580
690
|
@app.command(
|
581
691
|
help="Diagram input code using an LLM.",
|
582
692
|
no_args_is_help=True,
|
@@ -666,25 +776,27 @@ def diagram(
|
|
666
776
|
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
667
777
|
),
|
668
778
|
] = "file",
|
669
|
-
|
670
|
-
str,
|
779
|
+
refiner_types: Annotated[
|
780
|
+
list[str],
|
671
781
|
typer.Option(
|
672
782
|
"-r",
|
673
783
|
"--refiner",
|
674
|
-
help="
|
784
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
785
|
+
refinement chain",
|
675
786
|
click_type=click.Choice(list(REFINERS.keys())),
|
676
787
|
),
|
677
|
-
] = "
|
788
|
+
] = ["JanusRefiner"],
|
678
789
|
retriever_type: Annotated[
|
679
790
|
str,
|
680
791
|
typer.Option(
|
681
792
|
"-R",
|
682
793
|
"--retriever",
|
683
794
|
help="Name of custom retriever to use",
|
684
|
-
click_type=click.Choice(["active_usings"]),
|
795
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
685
796
|
),
|
686
797
|
] = None,
|
687
798
|
):
|
799
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
688
800
|
model_arguments = dict(temperature=temperature)
|
689
801
|
collections_config = get_collections_config()
|
690
802
|
diagram_generator = DiagramGenerator(
|
@@ -695,7 +807,7 @@ def diagram(
|
|
695
807
|
db_path=db_loc,
|
696
808
|
db_config=collections_config,
|
697
809
|
splitter_type=splitter_type,
|
698
|
-
|
810
|
+
refiner_types=refiner_types,
|
699
811
|
retriever_type=retriever_type,
|
700
812
|
diagram_type=diagram_type,
|
701
813
|
add_documentation=add_documentation,
|
@@ -952,7 +1064,7 @@ def llm_add(
|
|
952
1064
|
help="The type of the model",
|
953
1065
|
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
954
1066
|
),
|
955
|
-
] = "
|
1067
|
+
] = "Azure",
|
956
1068
|
):
|
957
1069
|
if not MODEL_CONFIG_DIR.exists():
|
958
1070
|
MODEL_CONFIG_DIR.mkdir(parents=True)
|
@@ -996,6 +1108,7 @@ def llm_add(
|
|
996
1108
|
"model_cost": {"input": in_cost, "output": out_cost},
|
997
1109
|
}
|
998
1110
|
elif model_type == "OpenAI":
|
1111
|
+
print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
|
999
1112
|
model_id = typer.prompt(
|
1000
1113
|
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1001
1114
|
default="gpt-4o",
|
@@ -1017,6 +1130,28 @@ def llm_add(
|
|
1017
1130
|
"token_limit": max_tokens,
|
1018
1131
|
"model_cost": model_cost,
|
1019
1132
|
}
|
1133
|
+
elif model_type == "Azure":
|
1134
|
+
model_id = typer.prompt(
|
1135
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1136
|
+
default="gpt-4o",
|
1137
|
+
type=click.Choice(azure_models),
|
1138
|
+
show_choices=False,
|
1139
|
+
)
|
1140
|
+
params = dict(
|
1141
|
+
# Azure uses the "azure_deployment" key for what we're calling "long_model_id"
|
1142
|
+
azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
|
1143
|
+
temperature=0.7,
|
1144
|
+
n=1,
|
1145
|
+
)
|
1146
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
1147
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
1148
|
+
cfg = {
|
1149
|
+
"model_type": model_type,
|
1150
|
+
"model_id": model_id,
|
1151
|
+
"model_args": params,
|
1152
|
+
"token_limit": max_tokens,
|
1153
|
+
"model_cost": model_cost,
|
1154
|
+
}
|
1020
1155
|
elif model_type == "BedrockChat" or model_type == "Bedrock":
|
1021
1156
|
model_id = typer.prompt(
|
1022
1157
|
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
janus/converter/__init__.py
CHANGED
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
|
|
2
2
|
from janus.converter.diagram import DiagramGenerator
|
3
3
|
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
4
4
|
from janus.converter.evaluate import Evaluator
|
5
|
+
from janus.converter.partition import Partitioner
|
5
6
|
from janus.converter.requirements import RequirementsDocumenter
|
6
7
|
from janus.converter.translate import Translator
|
@@ -90,14 +90,14 @@ class TestDiagramGenerator(unittest.TestCase):
|
|
90
90
|
def setUp(self):
|
91
91
|
"""Set up the tests."""
|
92
92
|
self.diagram_generator = DiagramGenerator(
|
93
|
-
model="gpt-4o",
|
93
|
+
model="gpt-4o-mini",
|
94
94
|
source_language="fortran",
|
95
95
|
diagram_type="Activity",
|
96
96
|
)
|
97
97
|
|
98
98
|
def test_init(self):
|
99
99
|
"""Test __init__ method."""
|
100
|
-
self.assertEqual(self.diagram_generator._model_name, "gpt-4o")
|
100
|
+
self.assertEqual(self.diagram_generator._model_name, "gpt-4o-mini")
|
101
101
|
self.assertEqual(self.diagram_generator._source_language, "fortran")
|
102
102
|
self.assertEqual(self.diagram_generator._diagram_type, "Activity")
|
103
103
|
|
janus/converter/converter.py
CHANGED
@@ -6,7 +6,12 @@ from typing import Any
|
|
6
6
|
|
7
7
|
from langchain_core.exceptions import OutputParserException
|
8
8
|
from langchain_core.prompts import ChatPromptTemplate
|
9
|
-
from langchain_core.runnables import
|
9
|
+
from langchain_core.runnables import (
|
10
|
+
Runnable,
|
11
|
+
RunnableLambda,
|
12
|
+
RunnableParallel,
|
13
|
+
RunnablePassthrough,
|
14
|
+
)
|
10
15
|
from openai import BadRequestError, RateLimitError
|
11
16
|
from pydantic import ValidationError
|
12
17
|
|
@@ -23,15 +28,14 @@ from janus.language.splitter import (
|
|
23
28
|
from janus.llm.model_callbacks import get_model_callback
|
24
29
|
from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel, load_model
|
25
30
|
from janus.parsers.parser import GenericParser, JanusParser
|
26
|
-
from janus.refiners.refiner import
|
27
|
-
FixParserExceptions,
|
28
|
-
HallucinationRefiner,
|
29
|
-
JanusRefiner,
|
30
|
-
ReflectionRefiner,
|
31
|
-
)
|
31
|
+
from janus.refiners.refiner import JanusRefiner
|
32
32
|
|
33
33
|
# from janus.refiners.refiner import BasicRefiner, Refiner
|
34
|
-
from janus.retrievers.retriever import
|
34
|
+
from janus.retrievers.retriever import (
|
35
|
+
ActiveUsingsRetriever,
|
36
|
+
JanusRetriever,
|
37
|
+
LanguageDocsRetriever,
|
38
|
+
)
|
35
39
|
from janus.utils.enums import LANGUAGES
|
36
40
|
from janus.utils.logger import create_logger
|
37
41
|
|
@@ -78,7 +82,7 @@ class Converter:
|
|
78
82
|
protected_node_types: tuple[str, ...] = (),
|
79
83
|
prune_node_types: tuple[str, ...] = (),
|
80
84
|
splitter_type: str = "file",
|
81
|
-
|
85
|
+
refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
|
82
86
|
retriever_type: str | None = None,
|
83
87
|
) -> None:
|
84
88
|
"""Initialize a Converter instance.
|
@@ -105,6 +109,7 @@ class Converter:
|
|
105
109
|
- None
|
106
110
|
retriever_type: The type of retriever to use. Valid values:
|
107
111
|
- "active_usings"
|
112
|
+
- "language_docs"
|
108
113
|
- None
|
109
114
|
"""
|
110
115
|
self._changed_attrs: set = set()
|
@@ -133,10 +138,11 @@ class Converter:
|
|
133
138
|
self._prompt: ChatPromptTemplate
|
134
139
|
|
135
140
|
self._parser: JanusParser = GenericParser()
|
141
|
+
self._base_parser: JanusParser = GenericParser()
|
136
142
|
self._combiner: Combiner = Combiner()
|
137
143
|
|
138
144
|
self._splitter_type: str
|
139
|
-
self.
|
145
|
+
self._refiner_types: list[type[JanusRefiner]]
|
140
146
|
self._retriever_type: str | None
|
141
147
|
|
142
148
|
self._splitter: Splitter
|
@@ -144,7 +150,7 @@ class Converter:
|
|
144
150
|
self._retriever: JanusRetriever
|
145
151
|
|
146
152
|
self.set_splitter(splitter_type=splitter_type)
|
147
|
-
self.
|
153
|
+
self.set_refiner_types(refiner_types=refiner_types)
|
148
154
|
self.set_retriever(retriever_type=retriever_type)
|
149
155
|
self.set_model(model_name=model, **model_arguments)
|
150
156
|
self.set_prompt(prompt_template=prompt_template)
|
@@ -170,7 +176,7 @@ class Converter:
|
|
170
176
|
self._load_model()
|
171
177
|
self._load_prompt()
|
172
178
|
self._load_retriever()
|
173
|
-
self.
|
179
|
+
self._load_refiner_chain()
|
174
180
|
self._load_splitter()
|
175
181
|
self._load_vectorizer()
|
176
182
|
self._load_chain()
|
@@ -210,13 +216,13 @@ class Converter:
|
|
210
216
|
|
211
217
|
self._splitter_type = splitter_type
|
212
218
|
|
213
|
-
def
|
219
|
+
def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
|
214
220
|
"""Validate and set the refiner type
|
215
221
|
|
216
222
|
Arguments:
|
217
223
|
refiner_type: the type of refiner to use
|
218
224
|
"""
|
219
|
-
self.
|
225
|
+
self._refiner_types = refiner_types
|
220
226
|
|
221
227
|
def set_retriever(self, retriever_type: str | None) -> None:
|
222
228
|
"""Validate and set the retriever type
|
@@ -355,48 +361,40 @@ class Converter:
|
|
355
361
|
def _load_retriever(self):
|
356
362
|
if self._retriever_type == "active_usings":
|
357
363
|
self._retriever = ActiveUsingsRetriever()
|
364
|
+
elif self._retriever_type == "language_docs":
|
365
|
+
self._retriever = LanguageDocsRetriever(self._llm, self._source_language)
|
358
366
|
else:
|
359
367
|
self._retriever = JanusRetriever()
|
360
368
|
|
361
|
-
@run_if_changed("
|
362
|
-
def
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
max_retries=self.max_prompts,
|
369
|
+
@run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
|
370
|
+
def _load_refiner_chain(self) -> None:
|
371
|
+
self._refiner_chain = RunnableParallel(
|
372
|
+
completion=self._llm,
|
373
|
+
prompt_value=RunnablePassthrough(),
|
374
|
+
)
|
375
|
+
for refiner_type in self._refiner_types[:-1]:
|
376
|
+
# NOTE: Do NOT remove refiner_type=refiner_type from lambda.
|
377
|
+
# Due to lambda capture, must be present or chain will not
|
378
|
+
# be correctly constructed.
|
379
|
+
self._refiner_chain = self._refiner_chain | RunnableParallel(
|
380
|
+
completion=lambda x, refiner_type=refiner_type: refiner_type(
|
381
|
+
llm=self._llm,
|
382
|
+
parser=self._base_parser,
|
383
|
+
max_retries=self.max_prompts,
|
384
|
+
).parse_completion(**x),
|
385
|
+
prompt_value=lambda x: x["prompt_value"],
|
379
386
|
)
|
380
|
-
|
381
|
-
self.
|
387
|
+
self._refiner_chain = self._refiner_chain | RunnableLambda(
|
388
|
+
lambda x: self._refiner_types[-1](
|
382
389
|
llm=self._llm,
|
383
390
|
parser=self._parser,
|
384
391
|
max_retries=self.max_prompts,
|
385
|
-
)
|
386
|
-
|
387
|
-
self._refiner = JanusRefiner(parser=self._parser)
|
392
|
+
).parse_completion(**x)
|
393
|
+
)
|
388
394
|
|
389
|
-
@run_if_changed("_parser", "_retriever", "_prompt", "_llm", "
|
395
|
+
@run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
|
390
396
|
def _load_chain(self):
|
391
|
-
self.chain = (
|
392
|
-
self._input_runnable()
|
393
|
-
| self._prompt
|
394
|
-
| RunnableParallel(
|
395
|
-
completion=self._llm,
|
396
|
-
prompt_value=RunnablePassthrough(),
|
397
|
-
)
|
398
|
-
| self._refiner.parse_runnable
|
399
|
-
)
|
397
|
+
self.chain = self._input_runnable() | self._prompt | self._refiner_chain
|
400
398
|
|
401
399
|
def _input_runnable(self) -> Runnable:
|
402
400
|
return RunnableParallel(
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import TranslatedCodeBlock
|
5
|
+
from janus.parsers.partition_parser import PartitionParser
|
6
|
+
from janus.utils.logger import create_logger
|
7
|
+
|
8
|
+
log = create_logger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class Partitioner(Converter):
|
12
|
+
def __init__(self, partition_token_limit: int, **kwargs):
|
13
|
+
super().__init__(**kwargs)
|
14
|
+
self.set_prompt("partition")
|
15
|
+
self._load_model()
|
16
|
+
self._parser = PartitionParser(
|
17
|
+
token_limit=partition_token_limit,
|
18
|
+
model=self._llm,
|
19
|
+
)
|
20
|
+
self._target_language = self._source_language
|
21
|
+
self._target_suffix = self._source_suffix
|
22
|
+
self._load_parameters()
|
23
|
+
|
24
|
+
def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
|
25
|
+
output_str = self._parser.parse_combined_output(block.complete_text)
|
26
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
27
|
+
out_path.write_text(output_str, encoding="utf-8")
|
janus/language/combine.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
import re
|
2
|
+
|
1
3
|
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
2
4
|
from janus.language.file import FileManager
|
3
5
|
from janus.utils.logger import create_logger
|
@@ -90,3 +92,23 @@ class ChunkCombiner(Combiner):
|
|
90
92
|
root: The functional code block to combine with its children.
|
91
93
|
"""
|
92
94
|
return root
|
95
|
+
|
96
|
+
|
97
|
+
class PartitionCombiner(Combiner):
|
98
|
+
@staticmethod
|
99
|
+
def combine(root: CodeBlock) -> None:
|
100
|
+
"""A combiner which inserts partition tags between code blocks"""
|
101
|
+
queue = [root]
|
102
|
+
while queue:
|
103
|
+
block = queue.pop(0)
|
104
|
+
if block.children:
|
105
|
+
queue.extend(block.children)
|
106
|
+
else:
|
107
|
+
block.affixes = (block.prefix, block.suffix + "\n<JANUS_PARTITION>\n")
|
108
|
+
|
109
|
+
super(PartitionCombiner, PartitionCombiner).combine(root)
|
110
|
+
root.text = re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.text)
|
111
|
+
root.affixes = (
|
112
|
+
root.prefix,
|
113
|
+
re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.suffix),
|
114
|
+
)
|
janus/llm/model_callbacks.py
CHANGED
@@ -23,6 +23,11 @@ openai_model_reroutes = {
|
|
23
23
|
"gpt-3.5-turbo-16k-0613": "gpt-3.5-turbo-0125",
|
24
24
|
}
|
25
25
|
|
26
|
+
azure_model_reroutes = {
|
27
|
+
"gpt-4o": "gpt-4o-2024-08-06",
|
28
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
29
|
+
"gpt-3.5-turbo-16k": "gpt35-turbo-16k",
|
30
|
+
}
|
26
31
|
|
27
32
|
# Updated 2024-06-21
|
28
33
|
COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
|
@@ -31,6 +36,10 @@ COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
|
|
31
36
|
"gpt-4-0125-preview": {"input": 0.01, "output": 0.03},
|
32
37
|
"gpt-4-0613": {"input": 0.03, "output": 0.06},
|
33
38
|
"gpt-4o-2024-05-13": {"input": 0.005, "output": 0.015},
|
39
|
+
"gpt-4o-2024-08-06": {"input": 0.00275, "output": 0.011},
|
40
|
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
41
|
+
"gpt35-turbo-16k": {"input": 0.003, "output": 0.004},
|
42
|
+
"gpt-35-turbo-16k": {"input": 0.003, "output": 0.004},
|
34
43
|
"anthropic.claude-v2": {"input": 0.008, "output": 0.024},
|
35
44
|
"anthropic.claude-instant-v1": {"input": 0.0008, "output": 0.0024},
|
36
45
|
"anthropic.claude-3-haiku-20240307-v1:0": {"input": 0.00025, "output": 0.00125},
|