janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +111 -0
- janus/cli/constants.py +43 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +178 -0
- janus/cli/document.py +174 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +187 -0
- janus/cli/partition.py +125 -0
- janus/cli/self_eval.py +149 -0
- janus/cli/translate.py +183 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +2 -0
- janus/converter/converter.py +129 -92
- janus/converter/document.py +21 -14
- janus/converter/evaluate.py +237 -4
- janus/converter/translate.py +3 -3
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/alc/_tests/test_alc.py +1 -1
- janus/language/alc/alc.py +9 -4
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +47 -12
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/splitter.py +31 -23
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/language/treesitter/treesitter.py +9 -1
- janus/llm/models_info.py +26 -13
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/eval_parsers/incose_parser.py +134 -0
- janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +47 -13
- janus/parsers/reqs_parser.py +8 -5
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +143 -4
- janus/utils/enums.py +140 -111
- janus/utils/logger.py +2 -0
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
- janus_llm-4.3.5.dist-info/RECORD +210 -0
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
- janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1343
- janus_llm-4.2.0.dist-info/RECORD +0 -113
- janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0
janus/cli/embedding.py
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
import click
|
2
|
+
import typer
|
3
|
+
from typing_extensions import Annotated
|
4
|
+
|
5
|
+
from janus.embedding.embedding_models_info import EmbeddingModelType
|
6
|
+
|
7
|
+
embedding = typer.Typer(
|
8
|
+
help="Embedding model commands",
|
9
|
+
add_completion=False,
|
10
|
+
no_args_is_help=True,
|
11
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
@embedding.command("add", help="Add an embedding model config to janus")
|
16
|
+
def embedding_add(
|
17
|
+
model_name: Annotated[
|
18
|
+
str, typer.Argument(help="The user's custom name for the model")
|
19
|
+
],
|
20
|
+
model_type: Annotated[
|
21
|
+
str,
|
22
|
+
typer.Option(
|
23
|
+
"--type",
|
24
|
+
"-t",
|
25
|
+
help="The type of the model",
|
26
|
+
click_type=click.Choice(list(val.value for val in EmbeddingModelType)),
|
27
|
+
),
|
28
|
+
] = "OpenAI",
|
29
|
+
):
|
30
|
+
import json
|
31
|
+
from pathlib import Path
|
32
|
+
|
33
|
+
from pydantic import AnyHttpUrl
|
34
|
+
|
35
|
+
from janus.embedding.embedding_models_info import (
|
36
|
+
EMBEDDING_COST_PER_MODEL,
|
37
|
+
EMBEDDING_MODEL_CONFIG_DIR,
|
38
|
+
EMBEDDING_TOKEN_LIMITS,
|
39
|
+
)
|
40
|
+
|
41
|
+
if not EMBEDDING_MODEL_CONFIG_DIR.exists():
|
42
|
+
EMBEDDING_MODEL_CONFIG_DIR.mkdir(parents=True)
|
43
|
+
model_cfg = EMBEDDING_MODEL_CONFIG_DIR / f"{model_name}.json"
|
44
|
+
if model_type in EmbeddingModelType.HuggingFaceInferenceAPI.values:
|
45
|
+
hf = typer.style("HuggingFaceInferenceAPI", fg="yellow")
|
46
|
+
url = typer.prompt(f"Enter the {hf} model's URL", type=str, value_proc=AnyHttpUrl)
|
47
|
+
api_model_name = typer.prompt("Enter the model's name", type=str, default="")
|
48
|
+
api_key = typer.prompt("Enter the API key", type=str, default="")
|
49
|
+
max_tokens = typer.prompt(
|
50
|
+
"Enter the model's maximum tokens", default=8191, type=int
|
51
|
+
)
|
52
|
+
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
53
|
+
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
54
|
+
params = dict(
|
55
|
+
model_name=api_model_name,
|
56
|
+
api_key=api_key,
|
57
|
+
)
|
58
|
+
cfg = {
|
59
|
+
"model_type": model_type,
|
60
|
+
"model_identifier": str(url),
|
61
|
+
"model_args": params,
|
62
|
+
"token_limit": max_tokens,
|
63
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
64
|
+
}
|
65
|
+
elif model_type in EmbeddingModelType.HuggingFaceLocal.values:
|
66
|
+
hf = typer.style("HuggingFace", fg="yellow")
|
67
|
+
model_id = typer.prompt(
|
68
|
+
f"Enter the {hf} model ID",
|
69
|
+
default="sentence-transformers/all-MiniLM-L6-v2",
|
70
|
+
type=str,
|
71
|
+
)
|
72
|
+
cache_folder = str(
|
73
|
+
Path(
|
74
|
+
typer.prompt(
|
75
|
+
"Enter the model's cache folder",
|
76
|
+
default=EMBEDDING_MODEL_CONFIG_DIR / "cache",
|
77
|
+
type=str,
|
78
|
+
)
|
79
|
+
)
|
80
|
+
)
|
81
|
+
max_tokens = typer.prompt(
|
82
|
+
"Enter the model's maximum tokens", default=8191, type=int
|
83
|
+
)
|
84
|
+
params = dict(
|
85
|
+
cache_folder=str(cache_folder),
|
86
|
+
)
|
87
|
+
cfg = {
|
88
|
+
"model_type": model_type,
|
89
|
+
"model_identifier": model_id,
|
90
|
+
"model_args": params,
|
91
|
+
"token_limit": max_tokens,
|
92
|
+
"model_cost": {"input": 0, "output": 0},
|
93
|
+
}
|
94
|
+
elif model_type in EmbeddingModelType.OpenAI.values:
|
95
|
+
available_models = list(EMBEDDING_COST_PER_MODEL.keys())
|
96
|
+
|
97
|
+
open_ai = typer.style("OpenAI", fg="green")
|
98
|
+
prompt = f"Enter the {open_ai} model name"
|
99
|
+
|
100
|
+
model_name = typer.prompt(
|
101
|
+
prompt,
|
102
|
+
default="text-embedding-3-small",
|
103
|
+
type=click.types.Choice(available_models),
|
104
|
+
show_choices=False,
|
105
|
+
)
|
106
|
+
params = dict(
|
107
|
+
model=model_name,
|
108
|
+
)
|
109
|
+
max_tokens = EMBEDDING_TOKEN_LIMITS[model_name]
|
110
|
+
model_cost = EMBEDDING_COST_PER_MODEL[model_name]
|
111
|
+
cfg = {
|
112
|
+
"model_type": model_type,
|
113
|
+
"model_identifier": model_name,
|
114
|
+
"model_args": params,
|
115
|
+
"token_limit": max_tokens,
|
116
|
+
"model_cost": model_cost,
|
117
|
+
}
|
118
|
+
else:
|
119
|
+
raise ValueError(f"Unknown model type {model_type}")
|
120
|
+
with open(model_cfg, "w") as f:
|
121
|
+
json.dump(cfg, f, indent=2)
|
122
|
+
print(f"Model config written to {model_cfg}")
|
janus/cli/llm.py
ADDED
@@ -0,0 +1,187 @@
|
|
1
|
+
import click
|
2
|
+
import typer
|
3
|
+
from typing_extensions import Annotated
|
4
|
+
|
5
|
+
from janus.llm.models_info import MODEL_TYPE_CONSTRUCTORS
|
6
|
+
|
7
|
+
llm = typer.Typer(
|
8
|
+
help="LLM commands",
|
9
|
+
add_completion=False,
|
10
|
+
no_args_is_help=True,
|
11
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
@llm.command("add", help="Add a model config to janus")
|
16
|
+
def llm_add(
|
17
|
+
model_name: Annotated[
|
18
|
+
str, typer.Argument(help="The user's custom name of the model")
|
19
|
+
],
|
20
|
+
model_type: Annotated[
|
21
|
+
str,
|
22
|
+
typer.Option(
|
23
|
+
"--type",
|
24
|
+
"-t",
|
25
|
+
help="The type of the model",
|
26
|
+
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
27
|
+
),
|
28
|
+
] = "Azure",
|
29
|
+
):
|
30
|
+
import json
|
31
|
+
|
32
|
+
from janus.llm.models_info import (
|
33
|
+
COST_PER_1K_TOKENS,
|
34
|
+
MODEL_CONFIG_DIR,
|
35
|
+
MODEL_ID_TO_LONG_ID,
|
36
|
+
TOKEN_LIMITS,
|
37
|
+
azure_models,
|
38
|
+
bedrock_models,
|
39
|
+
openai_models,
|
40
|
+
)
|
41
|
+
|
42
|
+
if not MODEL_CONFIG_DIR.exists():
|
43
|
+
MODEL_CONFIG_DIR.mkdir(parents=True)
|
44
|
+
model_cfg = MODEL_CONFIG_DIR / f"{model_name}.json"
|
45
|
+
if model_type == "HuggingFace":
|
46
|
+
url = typer.prompt("Enter the model's URL")
|
47
|
+
max_tokens = typer.prompt(
|
48
|
+
"Enter the model's maximum tokens", default=4096, type=int
|
49
|
+
)
|
50
|
+
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
51
|
+
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
52
|
+
params = dict(
|
53
|
+
inference_server_url=url,
|
54
|
+
max_new_tokens=max_tokens,
|
55
|
+
top_k=10,
|
56
|
+
top_p=0.95,
|
57
|
+
typical_p=0.95,
|
58
|
+
temperature=0.01,
|
59
|
+
repetition_penalty=1.03,
|
60
|
+
timeout=240,
|
61
|
+
)
|
62
|
+
cfg = {
|
63
|
+
"model_type": model_type,
|
64
|
+
"model_args": params,
|
65
|
+
"token_limit": max_tokens,
|
66
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
67
|
+
"input_token_proportion": 0.4,
|
68
|
+
}
|
69
|
+
elif model_type == "HuggingFaceLocal":
|
70
|
+
model_id = typer.prompt("Enter the model ID")
|
71
|
+
task = typer.prompt("Enter the task")
|
72
|
+
max_tokens = typer.prompt(
|
73
|
+
"Enter the model's maximum tokens", default=4096, type=int
|
74
|
+
)
|
75
|
+
in_cost = 0
|
76
|
+
out_cost = 0
|
77
|
+
params = {"model_id": model_id, "task": task}
|
78
|
+
cfg = {
|
79
|
+
"model_type": model_type,
|
80
|
+
"model_args": params,
|
81
|
+
"token_limit": max_tokens,
|
82
|
+
"model_cost": {"input": in_cost, "output": out_cost},
|
83
|
+
"input_token_proportion": 0.4,
|
84
|
+
}
|
85
|
+
elif model_type == "OpenAI":
|
86
|
+
print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
|
87
|
+
model_id = typer.prompt(
|
88
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
89
|
+
default="gpt-4o",
|
90
|
+
type=click.Choice(openai_models),
|
91
|
+
show_choices=False,
|
92
|
+
)
|
93
|
+
params = dict(
|
94
|
+
model_name=model_name,
|
95
|
+
temperature=0.7,
|
96
|
+
n=1,
|
97
|
+
)
|
98
|
+
max_tokens = TOKEN_LIMITS[model_name]
|
99
|
+
model_cost = COST_PER_1K_TOKENS[model_name]
|
100
|
+
cfg = {
|
101
|
+
"model_type": model_type,
|
102
|
+
"model_id": model_id,
|
103
|
+
"model_args": params,
|
104
|
+
"token_limit": max_tokens,
|
105
|
+
"model_cost": model_cost,
|
106
|
+
"input_token_proportion": 0.4,
|
107
|
+
}
|
108
|
+
elif model_type == "Azure":
|
109
|
+
model_id = typer.prompt(
|
110
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
111
|
+
default="gpt-4o",
|
112
|
+
type=click.Choice(azure_models),
|
113
|
+
show_choices=False,
|
114
|
+
)
|
115
|
+
params = dict(
|
116
|
+
# Azure uses the "azure_deployment" key for what we're calling "long_model_id"
|
117
|
+
azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
|
118
|
+
temperature=0.7,
|
119
|
+
n=1,
|
120
|
+
)
|
121
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
122
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
123
|
+
cfg = {
|
124
|
+
"model_type": model_type,
|
125
|
+
"model_id": model_id,
|
126
|
+
"model_args": params,
|
127
|
+
"token_limit": max_tokens,
|
128
|
+
"model_cost": model_cost,
|
129
|
+
"input_token_proportion": 0.4,
|
130
|
+
}
|
131
|
+
elif model_type == "BedrockChat" or model_type == "Bedrock":
|
132
|
+
model_id = typer.prompt(
|
133
|
+
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
134
|
+
default="bedrock-claude-sonnet",
|
135
|
+
type=click.Choice(bedrock_models),
|
136
|
+
show_choices=False,
|
137
|
+
)
|
138
|
+
params = dict(
|
139
|
+
# Bedrock uses the "model_id" key for what we're calling "long_model_id"
|
140
|
+
model_id=MODEL_ID_TO_LONG_ID[model_id],
|
141
|
+
model_kwargs={"temperature": 0.7},
|
142
|
+
)
|
143
|
+
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
144
|
+
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
145
|
+
cfg = {
|
146
|
+
"model_type": model_type,
|
147
|
+
"model_id": model_id,
|
148
|
+
"model_args": params,
|
149
|
+
"token_limit": max_tokens,
|
150
|
+
"model_cost": model_cost,
|
151
|
+
"input_token_proportion": 0.4,
|
152
|
+
}
|
153
|
+
else:
|
154
|
+
raise ValueError(f"Unknown model type {model_type}")
|
155
|
+
with open(model_cfg, "w") as f:
|
156
|
+
json.dump(cfg, f, indent=2)
|
157
|
+
print(f"Model config written to {model_cfg}")
|
158
|
+
|
159
|
+
|
160
|
+
@llm.command("ls", help="List all of the user-configured models")
|
161
|
+
def llm_ls(
|
162
|
+
all: Annotated[
|
163
|
+
bool,
|
164
|
+
typer.Option(
|
165
|
+
"--all",
|
166
|
+
"-a",
|
167
|
+
is_flag=True,
|
168
|
+
help="List all models, including the default model IDs.",
|
169
|
+
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
170
|
+
),
|
171
|
+
] = False,
|
172
|
+
):
|
173
|
+
import json
|
174
|
+
|
175
|
+
from janus.cli.constants import MODEL_CONFIG_DIR
|
176
|
+
from janus.llm.models_info import MODEL_TYPES
|
177
|
+
|
178
|
+
print("\n[green]User-configured models[/green]:")
|
179
|
+
for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
|
180
|
+
with open(model_cfg, "r") as f:
|
181
|
+
cfg = json.load(f)
|
182
|
+
print(f"\t[blue]{model_cfg.stem}[/blue]: [purple]{cfg['model_type']}[/purple]")
|
183
|
+
|
184
|
+
if all:
|
185
|
+
print("\n[green]Available model IDs[/green]:")
|
186
|
+
for model_id, model_type in MODEL_TYPES.items():
|
187
|
+
print(f"\t[blue]{model_id}[/blue]: [purple]{model_type}[/purple]")
|
janus/cli/partition.py
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def partition(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be partitioned. ",
|
20
|
+
),
|
21
|
+
],
|
22
|
+
language: Annotated[
|
23
|
+
str,
|
24
|
+
typer.Option(
|
25
|
+
"--language",
|
26
|
+
"-l",
|
27
|
+
help="The language of the source code.",
|
28
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
29
|
+
),
|
30
|
+
],
|
31
|
+
output_dir: Annotated[
|
32
|
+
Path,
|
33
|
+
typer.Option(
|
34
|
+
"--output-dir", "-o", help="The directory to store the partitioned code in."
|
35
|
+
),
|
36
|
+
],
|
37
|
+
llm_name: Annotated[
|
38
|
+
str,
|
39
|
+
typer.Option(
|
40
|
+
"--llm",
|
41
|
+
"-L",
|
42
|
+
help="The custom name of the model set with 'janus llm add'.",
|
43
|
+
),
|
44
|
+
] = "gpt-4o",
|
45
|
+
failure_dir: Annotated[
|
46
|
+
Optional[Path],
|
47
|
+
typer.Option(
|
48
|
+
"--failure-directory",
|
49
|
+
"-f",
|
50
|
+
help="The directory to store failure files during translation",
|
51
|
+
),
|
52
|
+
] = None,
|
53
|
+
max_prompts: Annotated[
|
54
|
+
int,
|
55
|
+
typer.Option(
|
56
|
+
"--max-prompts",
|
57
|
+
"-m",
|
58
|
+
help="The maximum number of times to prompt a model on one functional block "
|
59
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
60
|
+
),
|
61
|
+
] = 10,
|
62
|
+
overwrite: Annotated[
|
63
|
+
bool,
|
64
|
+
typer.Option(
|
65
|
+
"--overwrite/--preserve",
|
66
|
+
help="Whether to overwrite existing files in the output directory",
|
67
|
+
),
|
68
|
+
] = False,
|
69
|
+
temperature: Annotated[
|
70
|
+
float,
|
71
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
72
|
+
] = 0.7,
|
73
|
+
splitter_type: Annotated[
|
74
|
+
str,
|
75
|
+
typer.Option(
|
76
|
+
"-S",
|
77
|
+
"--splitter",
|
78
|
+
help="Name of custom splitter to use",
|
79
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
80
|
+
),
|
81
|
+
] = "file",
|
82
|
+
refiner_types: Annotated[
|
83
|
+
list[str],
|
84
|
+
typer.Option(
|
85
|
+
"-r",
|
86
|
+
"--refiner",
|
87
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
88
|
+
refinement chain",
|
89
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
90
|
+
),
|
91
|
+
] = ["JanusRefiner"],
|
92
|
+
max_tokens: Annotated[
|
93
|
+
int,
|
94
|
+
typer.Option(
|
95
|
+
"--max-tokens",
|
96
|
+
"-M",
|
97
|
+
help="The maximum number of tokens the model will take in. "
|
98
|
+
"If unspecificed, model's default max will be used.",
|
99
|
+
),
|
100
|
+
] = None,
|
101
|
+
partition_token_limit: Annotated[
|
102
|
+
int,
|
103
|
+
typer.Option(
|
104
|
+
"--partition-tokens",
|
105
|
+
"-pt",
|
106
|
+
help="The limit on the number of tokens per partition.",
|
107
|
+
),
|
108
|
+
] = 8192,
|
109
|
+
):
|
110
|
+
from janus.converter.partition import Partitioner
|
111
|
+
|
112
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
113
|
+
model_arguments = dict(temperature=temperature)
|
114
|
+
kwargs = dict(
|
115
|
+
model=llm_name,
|
116
|
+
model_arguments=model_arguments,
|
117
|
+
source_language=language,
|
118
|
+
max_prompts=max_prompts,
|
119
|
+
max_tokens=max_tokens,
|
120
|
+
splitter_type=splitter_type,
|
121
|
+
refiner_types=refiner_types,
|
122
|
+
partition_token_limit=partition_token_limit,
|
123
|
+
)
|
124
|
+
partitioner = Partitioner(**kwargs)
|
125
|
+
partitioner.translate(input_dir, output_dir, failure_dir, overwrite)
|
janus/cli/self_eval.py
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def llm_self_eval(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be evaluated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option(
|
35
|
+
"--output-dir", "-o", help="The directory to store the evaluations in."
|
36
|
+
),
|
37
|
+
],
|
38
|
+
failure_dir: Annotated[
|
39
|
+
Optional[Path],
|
40
|
+
typer.Option(
|
41
|
+
"--failure-directory",
|
42
|
+
"-f",
|
43
|
+
help="The directory to store failure files during translation",
|
44
|
+
),
|
45
|
+
] = None,
|
46
|
+
llm_name: Annotated[
|
47
|
+
str,
|
48
|
+
typer.Option(
|
49
|
+
"--llm",
|
50
|
+
"-L",
|
51
|
+
help="The custom name of the model set with 'janus llm add'.",
|
52
|
+
),
|
53
|
+
] = "gpt-4o",
|
54
|
+
evaluation_type: Annotated[
|
55
|
+
str,
|
56
|
+
typer.Option(
|
57
|
+
"--evaluation-type",
|
58
|
+
"-e",
|
59
|
+
help="Type of output to evaluate.",
|
60
|
+
click_type=click.Choice(["incose", "comments"]),
|
61
|
+
),
|
62
|
+
] = "incose",
|
63
|
+
max_prompts: Annotated[
|
64
|
+
int,
|
65
|
+
typer.Option(
|
66
|
+
"--max-prompts",
|
67
|
+
"-m",
|
68
|
+
help="The maximum number of times to prompt a model on one functional block "
|
69
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
70
|
+
),
|
71
|
+
] = 10,
|
72
|
+
overwrite: Annotated[
|
73
|
+
bool,
|
74
|
+
typer.Option(
|
75
|
+
"--overwrite/--preserve",
|
76
|
+
help="Whether to overwrite existing files in the output directory",
|
77
|
+
),
|
78
|
+
] = False,
|
79
|
+
temperature: Annotated[
|
80
|
+
float,
|
81
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
82
|
+
] = 0.7,
|
83
|
+
collection: Annotated[
|
84
|
+
str,
|
85
|
+
typer.Option(
|
86
|
+
"--collection",
|
87
|
+
"-c",
|
88
|
+
help="If set, will put the translated result into a Chroma DB "
|
89
|
+
"collection with the name provided.",
|
90
|
+
),
|
91
|
+
] = None,
|
92
|
+
splitter_type: Annotated[
|
93
|
+
str,
|
94
|
+
typer.Option(
|
95
|
+
"-S",
|
96
|
+
"--splitter",
|
97
|
+
help="Name of custom splitter to use",
|
98
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
99
|
+
),
|
100
|
+
] = "file",
|
101
|
+
refiner_types: Annotated[
|
102
|
+
list[str],
|
103
|
+
typer.Option(
|
104
|
+
"-r",
|
105
|
+
"--refiner",
|
106
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
107
|
+
refinement chain",
|
108
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
109
|
+
),
|
110
|
+
] = ["JanusRefiner"],
|
111
|
+
eval_items_per_request: Annotated[
|
112
|
+
int,
|
113
|
+
typer.Option(
|
114
|
+
"--eval-items-per-request",
|
115
|
+
"-rc",
|
116
|
+
help="The maximum number of evaluation items per request",
|
117
|
+
),
|
118
|
+
] = None,
|
119
|
+
max_tokens: Annotated[
|
120
|
+
int,
|
121
|
+
typer.Option(
|
122
|
+
"--max-tokens",
|
123
|
+
"-M",
|
124
|
+
help="The maximum number of tokens the model will take in. "
|
125
|
+
"If unspecificed, model's default max will be used.",
|
126
|
+
),
|
127
|
+
] = None,
|
128
|
+
):
|
129
|
+
from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
|
130
|
+
|
131
|
+
model_arguments = dict(temperature=temperature)
|
132
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
133
|
+
kwargs = dict(
|
134
|
+
eval_items_per_request=eval_items_per_request,
|
135
|
+
model=llm_name,
|
136
|
+
model_arguments=model_arguments,
|
137
|
+
source_language=language,
|
138
|
+
max_prompts=max_prompts,
|
139
|
+
max_tokens=max_tokens,
|
140
|
+
splitter_type=splitter_type,
|
141
|
+
refiner_types=refiner_types,
|
142
|
+
)
|
143
|
+
# Setting parser type here
|
144
|
+
if evaluation_type == "incose":
|
145
|
+
evaluator = RequirementEvaluator(**kwargs)
|
146
|
+
elif evaluation_type == "comments":
|
147
|
+
evaluator = InlineCommentEvaluator(**kwargs)
|
148
|
+
|
149
|
+
evaluator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|