janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +117 -0
- janus/cli/constants.py +49 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +207 -0
- janus/cli/document.py +183 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +191 -0
- janus/cli/partition.py +134 -0
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +147 -0
- janus/cli/translate.py +192 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +7 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +444 -153
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +27 -16
- janus/converter/evaluate.py +143 -144
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +3 -59
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +78 -14
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/llm/models_info.py +9 -1
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/metric.py +47 -124
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +6 -4
- janus/parsers/reqs_parser.py +11 -8
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +113 -4
- janus/utils/enums.py +127 -112
- janus/utils/logger.py +2 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
- janus_llm-4.4.5.dist-info/RECORD +210 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
- janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1488
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- janus_llm-4.3.1.dist-info/RECORD +0 -115
- janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/embedding.py
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
import click
|
2
|
+
import typer
|
3
|
+
from typing_extensions import Annotated
|
4
|
+
|
5
|
+
from janus.embedding.embedding_models_info import EmbeddingModelType
|
6
|
+
|
7
|
+
embedding = typer.Typer(
|
8
|
+
help="Embedding model commands",
|
9
|
+
add_completion=False,
|
10
|
+
no_args_is_help=True,
|
11
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
@embedding.command("add", help="Add an embedding model config to janus")
|
16
|
+
def embedding_add(
|
17
|
+
model_name: Annotated[
|
18
|
+
str, typer.Argument(help="The user's custom name for the model")
|
19
|
+
],
|
20
|
+
model_type: Annotated[
|
21
|
+
str,
|
22
|
+
typer.Option(
|
23
|
+
"--type",
|
24
|
+
"-t",
|
25
|
+
help="The type of the model",
|
26
|
+
click_type=click.Choice(list(val.value for val in EmbeddingModelType)),
|
27
|
+
),
|
28
|
+
] = "OpenAI",
|
29
|
+
):
|
30
|
+
import json
|
31
|
+
from pathlib import Path
|
32
|
+
|
33
|
+
from pydantic import AnyHttpUrl
|
34
|
+
|
35
|
+
from janus.embedding.embedding_models_info import (
|
36
|
+
EMBEDDING_COST_PER_MODEL,
|
37
|
+
EMBEDDING_MODEL_CONFIG_DIR,
|
38
|
+
EMBEDDING_TOKEN_LIMITS,
|
39
|
+
)
|
40
|
+
|
41
|
+
if not EMBEDDING_MODEL_CONFIG_DIR.exists():
|
42
|
+
EMBEDDING_MODEL_CONFIG_DIR.mkdir(parents=True)
|
43
|
+
model_cfg = EMBEDDING_MODEL_CONFIG_DIR / f"{model_name}.json"
|
44
|
+
if model_type in EmbeddingModelType.HuggingFaceInferenceAPI.values:
|
45
|
+
hf = typer.style("HuggingFaceInferenceAPI", fg="yellow")
|
46
|
+
url = typer.prompt(f"Enter the {hf} model's URL", type=str, value_proc=AnyHttpUrl)
|
47
|
+
api_model_name = typer.prompt("Enter the model's name", type=str, default="")
|
48
|
+
api_key = typer.prompt("Enter the API key", type=str, default="")
|
49
|
+
max_tokens = typer.prompt(
|
50
|
+
"Enter the model's maximum tokens", default=8191, type=int
|
51
|
+
)
|
52
|
+
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
53
|
+
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
54
|
+
params = dict(
|
55
|
+
model_name=api_model_name,
|
56
|
+
api_key=api_key,
|
57
|
+
)
|
58
|
+
cfg = {
|
59
|
+
"model_type": model_type,
|
60
|
+
"model_identifier": str(url),
|
61
|
+
"model_args": params,
|
62
|
+
"token_limit": max_tokens,
|
63
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
64
|
+
}
|
65
|
+
elif model_type in EmbeddingModelType.HuggingFaceLocal.values:
|
66
|
+
hf = typer.style("HuggingFace", fg="yellow")
|
67
|
+
model_id = typer.prompt(
|
68
|
+
f"Enter the {hf} model ID",
|
69
|
+
default="sentence-transformers/all-MiniLM-L6-v2",
|
70
|
+
type=str,
|
71
|
+
)
|
72
|
+
cache_folder = str(
|
73
|
+
Path(
|
74
|
+
typer.prompt(
|
75
|
+
"Enter the model's cache folder",
|
76
|
+
default=EMBEDDING_MODEL_CONFIG_DIR / "cache",
|
77
|
+
type=str,
|
78
|
+
)
|
79
|
+
)
|
80
|
+
)
|
81
|
+
max_tokens = typer.prompt(
|
82
|
+
"Enter the model's maximum tokens", default=8191, type=int
|
83
|
+
)
|
84
|
+
params = dict(
|
85
|
+
cache_folder=str(cache_folder),
|
86
|
+
)
|
87
|
+
cfg = {
|
88
|
+
"model_type": model_type,
|
89
|
+
"model_identifier": model_id,
|
90
|
+
"model_args": params,
|
91
|
+
"token_limit": max_tokens,
|
92
|
+
"model_cost": {"input": 0, "output": 0},
|
93
|
+
}
|
94
|
+
elif model_type in EmbeddingModelType.OpenAI.values:
|
95
|
+
available_models = list(EMBEDDING_COST_PER_MODEL.keys())
|
96
|
+
|
97
|
+
open_ai = typer.style("OpenAI", fg="green")
|
98
|
+
prompt = f"Enter the {open_ai} model name"
|
99
|
+
|
100
|
+
model_name = typer.prompt(
|
101
|
+
prompt,
|
102
|
+
default="text-embedding-3-small",
|
103
|
+
type=click.types.Choice(available_models),
|
104
|
+
show_choices=False,
|
105
|
+
)
|
106
|
+
params = dict(
|
107
|
+
model=model_name,
|
108
|
+
)
|
109
|
+
max_tokens = EMBEDDING_TOKEN_LIMITS[model_name]
|
110
|
+
model_cost = EMBEDDING_COST_PER_MODEL[model_name]
|
111
|
+
cfg = {
|
112
|
+
"model_type": model_type,
|
113
|
+
"model_identifier": model_name,
|
114
|
+
"model_args": params,
|
115
|
+
"token_limit": max_tokens,
|
116
|
+
"model_cost": model_cost,
|
117
|
+
}
|
118
|
+
else:
|
119
|
+
raise ValueError(f"Unknown model type {model_type}")
|
120
|
+
with open(model_cfg, "w") as f:
|
121
|
+
json.dump(cfg, f, indent=2)
|
122
|
+
print(f"Model config written to {model_cfg}")
|
janus/cli/llm.py
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
import click
|
2
|
+
import typer
|
3
|
+
from rich import print
|
4
|
+
from typing_extensions import Annotated
|
5
|
+
|
6
|
+
from janus.llm.models_info import MODEL_TYPE_CONSTRUCTORS
|
7
|
+
|
8
|
+
llm = typer.Typer(
|
9
|
+
help="LLM commands",
|
10
|
+
add_completion=False,
|
11
|
+
no_args_is_help=True,
|
12
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
@llm.command("add", help="Add a model config to janus")
|
17
|
+
def llm_add(
|
18
|
+
model_name: Annotated[
|
19
|
+
str, typer.Argument(help="The user's custom name of the model")
|
20
|
+
],
|
21
|
+
model_type: Annotated[
|
22
|
+
str,
|
23
|
+
typer.Option(
|
24
|
+
"--type",
|
25
|
+
"-t",
|
26
|
+
help="The type of the model",
|
27
|
+
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
28
|
+
),
|
29
|
+
] = "Azure",
|
30
|
+
):
|
31
|
+
import json
|
32
|
+
|
33
|
+
from janus.llm.models_info import (
|
34
|
+
COST_PER_1K_TOKENS,
|
35
|
+
MODEL_CONFIG_DIR,
|
36
|
+
MODEL_ID_TO_LONG_ID,
|
37
|
+
TOKEN_LIMITS,
|
38
|
+
azure_models,
|
39
|
+
bedrock_models,
|
40
|
+
openai_models,
|
41
|
+
)
|
42
|
+
|
43
|
+
if not MODEL_CONFIG_DIR.exists():
|
44
|
+
MODEL_CONFIG_DIR.mkdir(parents=True)
|
45
|
+
model_cfg = MODEL_CONFIG_DIR / f"{model_name}.json"
|
46
|
+
if model_type == "HuggingFace":
|
47
|
+
url = typer.prompt("Enter the model's URL")
|
48
|
+
max_tokens = typer.prompt(
|
49
|
+
"Enter the model's token limit", default=65536, type=int
|
50
|
+
)
|
51
|
+
max_tokens = typer.prompt(
|
52
|
+
"Enter the model's max output tokens", default=8192, type=int
|
53
|
+
)
|
54
|
+
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
55
|
+
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
56
|
+
params = dict(
|
57
|
+
inference_server_url=url,
|
58
|
+
max_new_tokens=max_tokens,
|
59
|
+
top_k=10,
|
60
|
+
top_p=0.95,
|
61
|
+
typical_p=0.95,
|
62
|
+
temperature=0.01,
|
63
|
+
repetition_penalty=1.03,
|
64
|
+
timeout=240,
|
65
|
+
)
|
66
|
+
cfg = {
|
67
|
+
"model_type": model_type,
|
68
|
+
"model_id": "gpt-4o", # This is a placeholder to use the Azure PromptEngine
|
69
|
+
"model_args": params,
|
70
|
+
"token_limit": max_tokens,
|
71
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
72
|
+
"input_token_proportion": 0.4,
|
73
|
+
}
|
74
|
+
elif model_type == "HuggingFaceLocal":
|
75
|
+
model_id = typer.prompt("Enter the model ID")
|
76
|
+
task = typer.prompt("Enter the task")
|
77
|
+
max_tokens = typer.prompt(
|
78
|
+
"Enter the model's maximum tokens", default=4096, type=int
|
79
|
+
)
|
80
|
+
in_cost = 0
|
81
|
+
out_cost = 0
|
82
|
+
params = {"model_id": model_id, "task": task}
|
83
|
+
cfg = {
|
84
|
+
"model_type": model_type,
|
85
|
+
"model_args": params,
|
86
|
+
"token_limit": max_tokens,
|
87
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
88
|
+
"input_token_proportion": 0.4,
|
89
|
+
}
|
90
|
+
elif model_type == "OpenAI":
|
91
|
+
print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
|
92
|
+
model_id = typer.prompt(
|
93
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
94
|
+
default="gpt-4o",
|
95
|
+
type=click.Choice(openai_models),
|
96
|
+
show_choices=False,
|
97
|
+
)
|
98
|
+
params = dict(
|
99
|
+
model_name=model_name,
|
100
|
+
temperature=0.7,
|
101
|
+
n=1,
|
102
|
+
)
|
103
|
+
max_tokens = TOKEN_LIMITS[model_name]
|
104
|
+
model_cost = COST_PER_1K_TOKENS[model_name]
|
105
|
+
cfg = {
|
106
|
+
"model_type": model_type,
|
107
|
+
"model_id": model_id,
|
108
|
+
"model_args": params,
|
109
|
+
"token_limit": max_tokens,
|
110
|
+
"model_cost": model_cost,
|
111
|
+
"input_token_proportion": 0.4,
|
112
|
+
}
|
113
|
+
elif model_type == "Azure":
|
114
|
+
model_id = typer.prompt(
|
115
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
116
|
+
default="gpt-4o",
|
117
|
+
type=click.Choice(azure_models),
|
118
|
+
show_choices=False,
|
119
|
+
)
|
120
|
+
params = dict(
|
121
|
+
# Azure uses the "azure_deployment" key for what we're calling "long_model_id"
|
122
|
+
azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
|
123
|
+
temperature=0.7,
|
124
|
+
n=1,
|
125
|
+
)
|
126
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
127
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
128
|
+
cfg = {
|
129
|
+
"model_type": model_type,
|
130
|
+
"model_id": model_id,
|
131
|
+
"model_args": params,
|
132
|
+
"token_limit": max_tokens,
|
133
|
+
"model_cost": model_cost,
|
134
|
+
"input_token_proportion": 0.4,
|
135
|
+
}
|
136
|
+
elif model_type == "BedrockChat" or model_type == "Bedrock":
|
137
|
+
model_id = typer.prompt(
|
138
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
139
|
+
default="bedrock-claude-sonnet",
|
140
|
+
type=click.Choice(bedrock_models),
|
141
|
+
show_choices=False,
|
142
|
+
)
|
143
|
+
params = dict(
|
144
|
+
# Bedrock uses the "model_id" key for what we're calling "long_model_id"
|
145
|
+
model_id=MODEL_ID_TO_LONG_ID[model_id],
|
146
|
+
model_kwargs={"temperature": 0.7},
|
147
|
+
)
|
148
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
149
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
150
|
+
cfg = {
|
151
|
+
"model_type": model_type,
|
152
|
+
"model_id": model_id,
|
153
|
+
"model_args": params,
|
154
|
+
"token_limit": max_tokens,
|
155
|
+
"model_cost": model_cost,
|
156
|
+
"input_token_proportion": 0.4,
|
157
|
+
}
|
158
|
+
else:
|
159
|
+
raise ValueError(f"Unknown model type {model_type}")
|
160
|
+
with open(model_cfg, "w") as f:
|
161
|
+
json.dump(cfg, f, indent=2)
|
162
|
+
print(f"Model config written to {model_cfg}")
|
163
|
+
|
164
|
+
|
165
|
+
@llm.command("ls", help="List all of the user-configured models")
|
166
|
+
def llm_ls(
|
167
|
+
all: Annotated[
|
168
|
+
bool,
|
169
|
+
typer.Option(
|
170
|
+
"--all",
|
171
|
+
"-a",
|
172
|
+
is_flag=True,
|
173
|
+
help="List all models, including the default model IDs.",
|
174
|
+
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
175
|
+
),
|
176
|
+
] = False,
|
177
|
+
):
|
178
|
+
import json
|
179
|
+
|
180
|
+
from janus.llm.models_info import MODEL_CONFIG_DIR, MODEL_TYPES
|
181
|
+
|
182
|
+
print("\n[green]User-configured models[/green]:")
|
183
|
+
for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
|
184
|
+
with open(model_cfg, "r") as f:
|
185
|
+
cfg = json.load(f)
|
186
|
+
print(f"\t[blue]{model_cfg.stem}[/blue]: [purple]{cfg['model_type']}[/purple]")
|
187
|
+
|
188
|
+
if all:
|
189
|
+
print("\n[green]Available model IDs[/green]:")
|
190
|
+
for model_id, model_type in MODEL_TYPES.items():
|
191
|
+
print(f"\t[blue]{model_id}[/blue]: [purple]{model_type}[/purple]")
|
janus/cli/partition.py
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def partition(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be partitioned. ",
|
20
|
+
),
|
21
|
+
],
|
22
|
+
language: Annotated[
|
23
|
+
str,
|
24
|
+
typer.Option(
|
25
|
+
"--language",
|
26
|
+
"-l",
|
27
|
+
help="The language of the source code.",
|
28
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
29
|
+
),
|
30
|
+
],
|
31
|
+
output_dir: Annotated[
|
32
|
+
Path,
|
33
|
+
typer.Option(
|
34
|
+
"--output", "-o", help="The directory to store the partitioned code in."
|
35
|
+
),
|
36
|
+
],
|
37
|
+
llm_name: Annotated[
|
38
|
+
str,
|
39
|
+
typer.Option(
|
40
|
+
"--llm",
|
41
|
+
"-L",
|
42
|
+
help="The custom name of the model set with 'janus llm add'.",
|
43
|
+
),
|
44
|
+
] = "gpt-4o",
|
45
|
+
failure_dir: Annotated[
|
46
|
+
Optional[Path],
|
47
|
+
typer.Option(
|
48
|
+
"--failure-directory",
|
49
|
+
"-f",
|
50
|
+
help="The directory to store failure files during translation",
|
51
|
+
),
|
52
|
+
] = None,
|
53
|
+
max_prompts: Annotated[
|
54
|
+
int,
|
55
|
+
typer.Option(
|
56
|
+
"--max-prompts",
|
57
|
+
"-m",
|
58
|
+
help="The maximum number of times to prompt a model on one functional block "
|
59
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
60
|
+
),
|
61
|
+
] = 10,
|
62
|
+
overwrite: Annotated[
|
63
|
+
bool,
|
64
|
+
typer.Option(
|
65
|
+
"--overwrite/--preserve",
|
66
|
+
help="Whether to overwrite existing files in the output directory",
|
67
|
+
),
|
68
|
+
] = False,
|
69
|
+
temperature: Annotated[
|
70
|
+
float,
|
71
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
72
|
+
] = 0.7,
|
73
|
+
splitter_type: Annotated[
|
74
|
+
str,
|
75
|
+
typer.Option(
|
76
|
+
"-S",
|
77
|
+
"--splitter",
|
78
|
+
help="Name of custom splitter to use",
|
79
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
80
|
+
),
|
81
|
+
] = "file",
|
82
|
+
refiner_types: Annotated[
|
83
|
+
list[str],
|
84
|
+
typer.Option(
|
85
|
+
"-r",
|
86
|
+
"--refiner",
|
87
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
88
|
+
refinement chain",
|
89
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
90
|
+
),
|
91
|
+
] = ["JanusRefiner"],
|
92
|
+
max_tokens: Annotated[
|
93
|
+
int,
|
94
|
+
typer.Option(
|
95
|
+
"--max-tokens",
|
96
|
+
"-M",
|
97
|
+
help="The maximum number of tokens the model will take in. "
|
98
|
+
"If unspecificed, model's default max will be used.",
|
99
|
+
),
|
100
|
+
] = None,
|
101
|
+
partition_token_limit: Annotated[
|
102
|
+
int,
|
103
|
+
typer.Option(
|
104
|
+
"--partition-tokens",
|
105
|
+
"-pt",
|
106
|
+
help="The limit on the number of tokens per partition.",
|
107
|
+
),
|
108
|
+
] = 8192,
|
109
|
+
use_janus_inputs: Annotated[
|
110
|
+
bool,
|
111
|
+
typer.Option(
|
112
|
+
"-j",
|
113
|
+
"--use-janus-inputs",
|
114
|
+
help="Present if converter should use janus inputs",
|
115
|
+
),
|
116
|
+
] = False,
|
117
|
+
):
|
118
|
+
from janus.converter.partition import Partitioner
|
119
|
+
|
120
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
121
|
+
model_arguments = dict(temperature=temperature)
|
122
|
+
kwargs = dict(
|
123
|
+
model=llm_name,
|
124
|
+
model_arguments=model_arguments,
|
125
|
+
source_language=language,
|
126
|
+
max_prompts=max_prompts,
|
127
|
+
max_tokens=max_tokens,
|
128
|
+
splitter_type=splitter_type,
|
129
|
+
refiner_types=refiner_types,
|
130
|
+
partition_token_limit=partition_token_limit,
|
131
|
+
use_janus_inputs=use_janus_inputs,
|
132
|
+
)
|
133
|
+
partitioner = Partitioner(**kwargs)
|
134
|
+
partitioner.translate(input_dir, output_dir, failure_dir, overwrite)
|
janus/cli/pipeline.py
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
import json
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
import click
|
6
|
+
import typer
|
7
|
+
from typing_extensions import Annotated
|
8
|
+
|
9
|
+
from janus.cli.constants import CONVERTERS
|
10
|
+
from janus.converter.chain import ConverterChain
|
11
|
+
from janus.utils.enums import LANGUAGES
|
12
|
+
|
13
|
+
|
14
|
+
def instiantiate(x):
|
15
|
+
if isinstance(x, dict):
|
16
|
+
if "type" in x:
|
17
|
+
if "args" not in x:
|
18
|
+
x["args"] = []
|
19
|
+
x["args"] = [instiantiate(a) for a in x["args"]]
|
20
|
+
if "kwargs" not in x:
|
21
|
+
x["kwargs"] = {}
|
22
|
+
x["kwargs"] = {k: instiantiate(x["kwargs"][k]) for k in x["kwargs"]}
|
23
|
+
if x["type"] not in CONVERTERS:
|
24
|
+
raise ValueError(f"Error: {x['type']} is not a Converter")
|
25
|
+
return CONVERTERS[x["type"]](*x["args"], **x["kwargs"])
|
26
|
+
else:
|
27
|
+
return {k: instiantiate(x[k]) for k in x}
|
28
|
+
elif isinstance(x, list):
|
29
|
+
return [instiantiate(a) for a in x]
|
30
|
+
else:
|
31
|
+
return x
|
32
|
+
|
33
|
+
|
34
|
+
def instiantiate_pipeline(
|
35
|
+
pipeline: list[dict],
|
36
|
+
language: str = "text",
|
37
|
+
model: str = "gpt-4o",
|
38
|
+
use_janus_inputs: None | bool = None,
|
39
|
+
):
|
40
|
+
if "kwargs" not in pipeline[0]:
|
41
|
+
pipeline[0]["kwargs"] = {}
|
42
|
+
pipeline[0]["kwargs"].update(source_language=language, model=model)
|
43
|
+
if use_janus_inputs is not None:
|
44
|
+
pipeline[0]["kwargs"].update(janus_inputs=use_janus_inputs)
|
45
|
+
print(pipeline[0])
|
46
|
+
converters = [instiantiate(pipeline[0])]
|
47
|
+
for p in pipeline[1:]:
|
48
|
+
p["kwargs"].update(source_language=converters[-1].target_language, model=model)
|
49
|
+
converters.append(instiantiate(p))
|
50
|
+
return ConverterChain(*converters)
|
51
|
+
|
52
|
+
|
53
|
+
def pipeline(
|
54
|
+
pipeline_file: Annotated[
|
55
|
+
Path, typer.Option("-p", "--pipeline", help="Name of pipeline file to use")
|
56
|
+
],
|
57
|
+
input_dir: Annotated[
|
58
|
+
Path,
|
59
|
+
typer.Option(
|
60
|
+
"--input",
|
61
|
+
"-i",
|
62
|
+
help="The directory containing the source code to be translated. "
|
63
|
+
"The files should all be in one flat directory.",
|
64
|
+
),
|
65
|
+
],
|
66
|
+
language: Annotated[
|
67
|
+
str,
|
68
|
+
typer.Option(
|
69
|
+
"--language",
|
70
|
+
"-l",
|
71
|
+
help="The language of the source code.",
|
72
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
73
|
+
),
|
74
|
+
],
|
75
|
+
output_dir: Annotated[
|
76
|
+
Path,
|
77
|
+
typer.Option(
|
78
|
+
"--output", "-o", help="The directory to store the translated code in."
|
79
|
+
),
|
80
|
+
],
|
81
|
+
llm_name: Annotated[
|
82
|
+
str,
|
83
|
+
typer.Option(
|
84
|
+
"--llm",
|
85
|
+
"-L",
|
86
|
+
help="The custom name of the model set with 'janus llm add'.",
|
87
|
+
),
|
88
|
+
],
|
89
|
+
failure_dir: Annotated[
|
90
|
+
Optional[Path],
|
91
|
+
typer.Option(
|
92
|
+
"--failure-directory",
|
93
|
+
"-f",
|
94
|
+
help="The directory to store failure files during documentation",
|
95
|
+
),
|
96
|
+
] = None,
|
97
|
+
overwrite: Annotated[
|
98
|
+
bool,
|
99
|
+
typer.Option(
|
100
|
+
"--overwrite/--preserve",
|
101
|
+
help="Whether to overwrite existing files in the output directory",
|
102
|
+
),
|
103
|
+
] = False,
|
104
|
+
use_janus_inputs: Annotated[
|
105
|
+
Optional[bool],
|
106
|
+
typer.Option(
|
107
|
+
"-j",
|
108
|
+
"--use-janus-inputs",
|
109
|
+
help="Present if converter chain should use janus input files",
|
110
|
+
),
|
111
|
+
] = None,
|
112
|
+
):
|
113
|
+
with open(pipeline_file, "r") as f:
|
114
|
+
json_obj = json.load(f)
|
115
|
+
pipeline = instiantiate_pipeline(
|
116
|
+
json_obj, language=language, model=llm_name, use_janus_inputs=use_janus_inputs
|
117
|
+
)
|
118
|
+
pipeline.translate(
|
119
|
+
input_directory=input_dir,
|
120
|
+
output_directory=output_dir,
|
121
|
+
failure_directory=failure_dir,
|
122
|
+
overwrite=overwrite,
|
123
|
+
)
|