janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +117 -0
- janus/cli/constants.py +49 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +207 -0
- janus/cli/document.py +183 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +191 -0
- janus/cli/partition.py +134 -0
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +147 -0
- janus/cli/translate.py +192 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +7 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +444 -153
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +27 -16
- janus/converter/evaluate.py +143 -144
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +3 -59
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +78 -14
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/llm/models_info.py +9 -1
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/metric.py +47 -124
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +6 -4
- janus/parsers/reqs_parser.py +11 -8
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +113 -4
- janus/utils/enums.py +127 -112
- janus/utils/logger.py +2 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
- janus_llm-4.4.5.dist-info/RECORD +210 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
- janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1488
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- janus_llm-4.3.1.dist-info/RECORD +0 -115
- janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli.py
DELETED
@@ -1,1488 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import logging
|
3
|
-
import os
|
4
|
-
import subprocess # nosec
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import List, Optional
|
7
|
-
|
8
|
-
import click
|
9
|
-
import typer
|
10
|
-
from pydantic import AnyHttpUrl
|
11
|
-
from rich import print
|
12
|
-
from rich.console import Console
|
13
|
-
from rich.prompt import Confirm
|
14
|
-
from typing_extensions import Annotated
|
15
|
-
|
16
|
-
import janus.refiners.refiner
|
17
|
-
import janus.refiners.uml
|
18
|
-
from janus.converter.aggregator import Aggregator
|
19
|
-
from janus.converter.converter import Converter
|
20
|
-
from janus.converter.diagram import DiagramGenerator
|
21
|
-
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
22
|
-
from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
|
23
|
-
from janus.converter.partition import Partitioner
|
24
|
-
from janus.converter.requirements import RequirementsDocumenter
|
25
|
-
from janus.converter.translate import Translator
|
26
|
-
from janus.embedding.collections import Collections
|
27
|
-
from janus.embedding.database import ChromaEmbeddingDatabase
|
28
|
-
from janus.embedding.embedding_models_info import (
|
29
|
-
EMBEDDING_COST_PER_MODEL,
|
30
|
-
EMBEDDING_MODEL_CONFIG_DIR,
|
31
|
-
EMBEDDING_TOKEN_LIMITS,
|
32
|
-
EmbeddingModelType,
|
33
|
-
)
|
34
|
-
from janus.embedding.vectorize import ChromaDBVectorizer
|
35
|
-
from janus.language.binary import BinarySplitter
|
36
|
-
from janus.language.mumps import MumpsSplitter
|
37
|
-
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
38
|
-
from janus.language.treesitter import TreeSitterSplitter
|
39
|
-
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
40
|
-
from janus.llm.models_info import (
|
41
|
-
MODEL_CONFIG_DIR,
|
42
|
-
MODEL_ID_TO_LONG_ID,
|
43
|
-
MODEL_TYPE_CONSTRUCTORS,
|
44
|
-
MODEL_TYPES,
|
45
|
-
TOKEN_LIMITS,
|
46
|
-
azure_models,
|
47
|
-
bedrock_models,
|
48
|
-
openai_models,
|
49
|
-
)
|
50
|
-
from janus.metrics.cli import evaluate
|
51
|
-
from janus.utils.enums import LANGUAGES
|
52
|
-
from janus.utils.logger import create_logger
|
53
|
-
|
54
|
-
httpx_logger = logging.getLogger("httpx")
|
55
|
-
httpx_logger.setLevel(logging.WARNING)
|
56
|
-
|
57
|
-
log = create_logger(__name__)
|
58
|
-
homedir = Path.home().expanduser()
|
59
|
-
|
60
|
-
janus_dir = homedir / ".janus"
|
61
|
-
if not janus_dir.exists():
|
62
|
-
janus_dir.mkdir(parents=True)
|
63
|
-
|
64
|
-
db_file = janus_dir / ".db"
|
65
|
-
if not db_file.exists():
|
66
|
-
with open(db_file, "w") as f:
|
67
|
-
f.write(str(janus_dir / "chroma.db"))
|
68
|
-
|
69
|
-
with open(db_file, "r") as f:
|
70
|
-
db_loc = f.read()
|
71
|
-
|
72
|
-
collections_config_file = Path(db_loc) / "collections.json"
|
73
|
-
|
74
|
-
|
75
|
-
def get_subclasses(cls):
|
76
|
-
return set(cls.__subclasses__()).union(
|
77
|
-
set(s for c in cls.__subclasses__() for s in get_subclasses(c))
|
78
|
-
)
|
79
|
-
|
80
|
-
|
81
|
-
REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
|
82
|
-
{janus.refiners.refiner.JanusRefiner}
|
83
|
-
)
|
84
|
-
REFINERS = {r.__name__: r for r in REFINER_TYPES}
|
85
|
-
|
86
|
-
|
87
|
-
def get_collections_config():
|
88
|
-
if collections_config_file.exists():
|
89
|
-
with open(collections_config_file, "r") as f:
|
90
|
-
config = json.load(f)
|
91
|
-
else:
|
92
|
-
config = {}
|
93
|
-
return config
|
94
|
-
|
95
|
-
|
96
|
-
app = typer.Typer(
|
97
|
-
help=(
|
98
|
-
"[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
|
99
|
-
"documenting, and diagramming code using large language models.[/bold]"
|
100
|
-
),
|
101
|
-
add_completion=False,
|
102
|
-
no_args_is_help=True,
|
103
|
-
context_settings={"help_option_names": ["-h", "--help"]},
|
104
|
-
rich_markup_mode="rich",
|
105
|
-
)
|
106
|
-
|
107
|
-
|
108
|
-
db = typer.Typer(
|
109
|
-
help="Database commands",
|
110
|
-
add_completion=False,
|
111
|
-
no_args_is_help=True,
|
112
|
-
context_settings={"help_option_names": ["-h", "--help"]},
|
113
|
-
)
|
114
|
-
llm = typer.Typer(
|
115
|
-
help="LLM commands",
|
116
|
-
add_completion=False,
|
117
|
-
no_args_is_help=True,
|
118
|
-
context_settings={"help_option_names": ["-h", "--help"]},
|
119
|
-
)
|
120
|
-
|
121
|
-
embedding = typer.Typer(
|
122
|
-
help="Embedding model commands",
|
123
|
-
add_completion=False,
|
124
|
-
no_args_is_help=True,
|
125
|
-
context_settings={"help_option_names": ["-h", "--help"]},
|
126
|
-
)
|
127
|
-
|
128
|
-
|
129
|
-
def version_callback(value: bool) -> None:
|
130
|
-
if value:
|
131
|
-
from . import __version__ as version
|
132
|
-
|
133
|
-
print(f"Janus CLI [blue]v{version}[/blue]")
|
134
|
-
raise typer.Exit()
|
135
|
-
|
136
|
-
|
137
|
-
@app.callback()
|
138
|
-
def common(
|
139
|
-
ctx: typer.Context,
|
140
|
-
version: bool = typer.Option(
|
141
|
-
None,
|
142
|
-
"--version",
|
143
|
-
"-v",
|
144
|
-
callback=version_callback,
|
145
|
-
help="Print the version and exit.",
|
146
|
-
),
|
147
|
-
) -> None:
|
148
|
-
"""A function for getting the app version
|
149
|
-
|
150
|
-
This will call the version_callback function to print the version and exit.
|
151
|
-
|
152
|
-
Arguments:
|
153
|
-
ctx: The typer context
|
154
|
-
version: A boolean flag for the version
|
155
|
-
"""
|
156
|
-
pass
|
157
|
-
|
158
|
-
|
159
|
-
@app.command(
|
160
|
-
help="Translate code from one language to another using an LLM.",
|
161
|
-
no_args_is_help=True,
|
162
|
-
)
|
163
|
-
def translate(
|
164
|
-
input_dir: Annotated[
|
165
|
-
Path,
|
166
|
-
typer.Option(
|
167
|
-
"--input",
|
168
|
-
"-i",
|
169
|
-
help="The directory containing the source code to be translated. "
|
170
|
-
"The files should all be in one flat directory.",
|
171
|
-
),
|
172
|
-
],
|
173
|
-
source_lang: Annotated[
|
174
|
-
str,
|
175
|
-
typer.Option(
|
176
|
-
"--source-language",
|
177
|
-
"-s",
|
178
|
-
help="The language of the source code.",
|
179
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
180
|
-
),
|
181
|
-
],
|
182
|
-
output_dir: Annotated[
|
183
|
-
Path,
|
184
|
-
typer.Option(
|
185
|
-
"--output", "-o", help="The directory to store the translated code in."
|
186
|
-
),
|
187
|
-
],
|
188
|
-
target_lang: Annotated[
|
189
|
-
str,
|
190
|
-
typer.Option(
|
191
|
-
"--target-language",
|
192
|
-
"-t",
|
193
|
-
help="The desired output language to translate the source code to. The "
|
194
|
-
"format can follow a 'language-version' syntax. Use 'text' to get plaintext"
|
195
|
-
"results as returned by the LLM. Examples: `python-3.10`, `mumps`, `java-10`,"
|
196
|
-
"text.",
|
197
|
-
),
|
198
|
-
],
|
199
|
-
llm_name: Annotated[
|
200
|
-
str,
|
201
|
-
typer.Option(
|
202
|
-
"--llm",
|
203
|
-
"-L",
|
204
|
-
help="The custom name of the model set with 'janus llm add'.",
|
205
|
-
),
|
206
|
-
],
|
207
|
-
max_prompts: Annotated[
|
208
|
-
int,
|
209
|
-
typer.Option(
|
210
|
-
"--max-prompts",
|
211
|
-
"-m",
|
212
|
-
help="The maximum number of times to prompt a model on one functional block "
|
213
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
214
|
-
),
|
215
|
-
] = 10,
|
216
|
-
overwrite: Annotated[
|
217
|
-
bool,
|
218
|
-
typer.Option(
|
219
|
-
"--overwrite/--preserve",
|
220
|
-
help="Whether to overwrite existing files in the output directory",
|
221
|
-
),
|
222
|
-
] = False,
|
223
|
-
skip_context: Annotated[
|
224
|
-
bool,
|
225
|
-
typer.Option(
|
226
|
-
"--skip-context",
|
227
|
-
help="Prompts will include any context information associated with source"
|
228
|
-
" code blocks, unless this option is specified",
|
229
|
-
),
|
230
|
-
] = False,
|
231
|
-
temp: Annotated[
|
232
|
-
float,
|
233
|
-
typer.Option("--temperature", "-T", help="Sampling temperature.", min=0, max=2),
|
234
|
-
] = 0.7,
|
235
|
-
prompt_template: Annotated[
|
236
|
-
str,
|
237
|
-
typer.Option(
|
238
|
-
"--prompt-template",
|
239
|
-
"-p",
|
240
|
-
help="Name of the Janus prompt template directory or "
|
241
|
-
"path to a directory containing those template files.",
|
242
|
-
),
|
243
|
-
] = "simple",
|
244
|
-
collection: Annotated[
|
245
|
-
str,
|
246
|
-
typer.Option(
|
247
|
-
"--collection",
|
248
|
-
"-c",
|
249
|
-
help="If set, will put the translated result into a Chroma DB "
|
250
|
-
"collection with the name provided.",
|
251
|
-
),
|
252
|
-
] = None,
|
253
|
-
splitter_type: Annotated[
|
254
|
-
str,
|
255
|
-
typer.Option(
|
256
|
-
"-S",
|
257
|
-
"--splitter",
|
258
|
-
help="Name of custom splitter to use",
|
259
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
260
|
-
),
|
261
|
-
] = "file",
|
262
|
-
refiner_types: Annotated[
|
263
|
-
list[str],
|
264
|
-
typer.Option(
|
265
|
-
"-r",
|
266
|
-
"--refiner",
|
267
|
-
help="List of refiner types to use. Add -r for each refiner to use in\
|
268
|
-
refinement chain",
|
269
|
-
click_type=click.Choice(list(REFINERS.keys())),
|
270
|
-
),
|
271
|
-
] = ["JanusRefiner"],
|
272
|
-
retriever_type: Annotated[
|
273
|
-
str,
|
274
|
-
typer.Option(
|
275
|
-
"-R",
|
276
|
-
"--retriever",
|
277
|
-
help="Name of custom retriever to use",
|
278
|
-
click_type=click.Choice(["active_usings", "language_docs"]),
|
279
|
-
),
|
280
|
-
] = None,
|
281
|
-
max_tokens: Annotated[
|
282
|
-
int,
|
283
|
-
typer.Option(
|
284
|
-
"--max-tokens",
|
285
|
-
"-M",
|
286
|
-
help="The maximum number of tokens the model will take in. "
|
287
|
-
"If unspecificed, model's default max will be used.",
|
288
|
-
),
|
289
|
-
] = None,
|
290
|
-
):
|
291
|
-
refiner_types = [REFINERS[r] for r in refiner_types]
|
292
|
-
try:
|
293
|
-
target_language, target_version = target_lang.split("-")
|
294
|
-
except ValueError:
|
295
|
-
target_language = target_lang
|
296
|
-
target_version = None
|
297
|
-
# make sure not overwriting input
|
298
|
-
if source_lang.lower() == target_language.lower() and input_dir == output_dir:
|
299
|
-
log.error("Output files would overwrite input! Aborting...")
|
300
|
-
raise ValueError
|
301
|
-
|
302
|
-
model_arguments = dict(temperature=temp)
|
303
|
-
collections_config = get_collections_config()
|
304
|
-
translator = Translator(
|
305
|
-
model=llm_name,
|
306
|
-
model_arguments=model_arguments,
|
307
|
-
source_language=source_lang,
|
308
|
-
target_language=target_language,
|
309
|
-
target_version=target_version,
|
310
|
-
max_prompts=max_prompts,
|
311
|
-
max_tokens=max_tokens,
|
312
|
-
prompt_template=prompt_template,
|
313
|
-
db_path=db_loc,
|
314
|
-
db_config=collections_config,
|
315
|
-
splitter_type=splitter_type,
|
316
|
-
refiner_types=refiner_types,
|
317
|
-
retriever_type=retriever_type,
|
318
|
-
)
|
319
|
-
translator.translate(input_dir, output_dir, overwrite, collection)
|
320
|
-
|
321
|
-
|
322
|
-
@app.command(
|
323
|
-
help="Document input code using an LLM.",
|
324
|
-
no_args_is_help=True,
|
325
|
-
)
|
326
|
-
def document(
|
327
|
-
input_dir: Annotated[
|
328
|
-
Path,
|
329
|
-
typer.Option(
|
330
|
-
"--input",
|
331
|
-
"-i",
|
332
|
-
help="The directory containing the source code to be translated. "
|
333
|
-
"The files should all be in one flat directory.",
|
334
|
-
),
|
335
|
-
],
|
336
|
-
language: Annotated[
|
337
|
-
str,
|
338
|
-
typer.Option(
|
339
|
-
"--language",
|
340
|
-
"-l",
|
341
|
-
help="The language of the source code.",
|
342
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
343
|
-
),
|
344
|
-
],
|
345
|
-
output_dir: Annotated[
|
346
|
-
Path,
|
347
|
-
typer.Option(
|
348
|
-
"--output-dir", "-o", help="The directory to store the translated code in."
|
349
|
-
),
|
350
|
-
],
|
351
|
-
llm_name: Annotated[
|
352
|
-
str,
|
353
|
-
typer.Option(
|
354
|
-
"--llm",
|
355
|
-
"-L",
|
356
|
-
help="The custom name of the model set with 'janus llm add'.",
|
357
|
-
),
|
358
|
-
],
|
359
|
-
max_prompts: Annotated[
|
360
|
-
int,
|
361
|
-
typer.Option(
|
362
|
-
"--max-prompts",
|
363
|
-
"-m",
|
364
|
-
help="The maximum number of times to prompt a model on one functional block "
|
365
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
366
|
-
),
|
367
|
-
] = 10,
|
368
|
-
overwrite: Annotated[
|
369
|
-
bool,
|
370
|
-
typer.Option(
|
371
|
-
"--overwrite/--preserve",
|
372
|
-
help="Whether to overwrite existing files in the output directory",
|
373
|
-
),
|
374
|
-
] = False,
|
375
|
-
doc_mode: Annotated[
|
376
|
-
str,
|
377
|
-
typer.Option(
|
378
|
-
"--doc-mode",
|
379
|
-
"-d",
|
380
|
-
help="The documentation mode.",
|
381
|
-
click_type=click.Choice(["madlibs", "summary", "multidoc", "requirements"]),
|
382
|
-
),
|
383
|
-
] = "madlibs",
|
384
|
-
comments_per_request: Annotated[
|
385
|
-
int,
|
386
|
-
typer.Option(
|
387
|
-
"--comments-per-request",
|
388
|
-
"-rc",
|
389
|
-
help="The maximum number of comments to generate per request when using "
|
390
|
-
"MadLibs documentation mode.",
|
391
|
-
),
|
392
|
-
] = None,
|
393
|
-
drop_comments: Annotated[
|
394
|
-
bool,
|
395
|
-
typer.Option(
|
396
|
-
"--drop-comments/--keep-comments",
|
397
|
-
help="Whether to drop or keep comments in the code sent to the LLM",
|
398
|
-
),
|
399
|
-
] = False,
|
400
|
-
temperature: Annotated[
|
401
|
-
float,
|
402
|
-
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
403
|
-
] = 0.7,
|
404
|
-
collection: Annotated[
|
405
|
-
str,
|
406
|
-
typer.Option(
|
407
|
-
"--collection",
|
408
|
-
"-c",
|
409
|
-
help="If set, will put the translated result into a Chroma DB "
|
410
|
-
"collection with the name provided.",
|
411
|
-
),
|
412
|
-
] = None,
|
413
|
-
splitter_type: Annotated[
|
414
|
-
str,
|
415
|
-
typer.Option(
|
416
|
-
"-S",
|
417
|
-
"--splitter",
|
418
|
-
help="Name of custom splitter to use",
|
419
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
420
|
-
),
|
421
|
-
] = "file",
|
422
|
-
refiner_types: Annotated[
|
423
|
-
list[str],
|
424
|
-
typer.Option(
|
425
|
-
"-r",
|
426
|
-
"--refiner",
|
427
|
-
help="List of refiner types to use. Add -r for each refiner to use in\
|
428
|
-
refinement chain",
|
429
|
-
click_type=click.Choice(list(REFINERS.keys())),
|
430
|
-
),
|
431
|
-
] = ["JanusRefiner"],
|
432
|
-
retriever_type: Annotated[
|
433
|
-
str,
|
434
|
-
typer.Option(
|
435
|
-
"-R",
|
436
|
-
"--retriever",
|
437
|
-
help="Name of custom retriever to use",
|
438
|
-
click_type=click.Choice(["active_usings", "language_docs"]),
|
439
|
-
),
|
440
|
-
] = None,
|
441
|
-
max_tokens: Annotated[
|
442
|
-
int,
|
443
|
-
typer.Option(
|
444
|
-
"--max-tokens",
|
445
|
-
"-M",
|
446
|
-
help="The maximum number of tokens the model will take in. "
|
447
|
-
"If unspecificed, model's default max will be used.",
|
448
|
-
),
|
449
|
-
] = None,
|
450
|
-
):
|
451
|
-
refiner_types = [REFINERS[r] for r in refiner_types]
|
452
|
-
model_arguments = dict(temperature=temperature)
|
453
|
-
collections_config = get_collections_config()
|
454
|
-
kwargs = dict(
|
455
|
-
model=llm_name,
|
456
|
-
model_arguments=model_arguments,
|
457
|
-
source_language=language,
|
458
|
-
max_prompts=max_prompts,
|
459
|
-
max_tokens=max_tokens,
|
460
|
-
db_path=db_loc,
|
461
|
-
db_config=collections_config,
|
462
|
-
splitter_type=splitter_type,
|
463
|
-
refiner_types=refiner_types,
|
464
|
-
retriever_type=retriever_type,
|
465
|
-
)
|
466
|
-
if doc_mode == "madlibs":
|
467
|
-
documenter = MadLibsDocumenter(
|
468
|
-
comments_per_request=comments_per_request, **kwargs
|
469
|
-
)
|
470
|
-
elif doc_mode == "multidoc":
|
471
|
-
documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
|
472
|
-
elif doc_mode == "requirements":
|
473
|
-
documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
|
474
|
-
else:
|
475
|
-
documenter = Documenter(drop_comments=drop_comments, **kwargs)
|
476
|
-
|
477
|
-
documenter.translate(input_dir, output_dir, overwrite, collection)
|
478
|
-
|
479
|
-
|
480
|
-
@app.command()
|
481
|
-
def aggregate(
|
482
|
-
input_dir: Annotated[
|
483
|
-
Path,
|
484
|
-
typer.Option(
|
485
|
-
"--input",
|
486
|
-
"-i",
|
487
|
-
help="The directory containing the source code to be translated. "
|
488
|
-
"The files should all be in one flat directory.",
|
489
|
-
),
|
490
|
-
],
|
491
|
-
language: Annotated[
|
492
|
-
str,
|
493
|
-
typer.Option(
|
494
|
-
"--language",
|
495
|
-
"-l",
|
496
|
-
help="The language of the source code.",
|
497
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
498
|
-
),
|
499
|
-
],
|
500
|
-
output_dir: Annotated[
|
501
|
-
Path,
|
502
|
-
typer.Option(
|
503
|
-
"--output-dir", "-o", help="The directory to store the translated code in."
|
504
|
-
),
|
505
|
-
],
|
506
|
-
llm_name: Annotated[
|
507
|
-
str,
|
508
|
-
typer.Option(
|
509
|
-
"--llm",
|
510
|
-
"-L",
|
511
|
-
help="The custom name of the model set with 'janus llm add'.",
|
512
|
-
),
|
513
|
-
],
|
514
|
-
max_prompts: Annotated[
|
515
|
-
int,
|
516
|
-
typer.Option(
|
517
|
-
"--max-prompts",
|
518
|
-
"-m",
|
519
|
-
help="The maximum number of times to prompt a model on one functional block "
|
520
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
521
|
-
),
|
522
|
-
] = 10,
|
523
|
-
overwrite: Annotated[
|
524
|
-
bool,
|
525
|
-
typer.Option(
|
526
|
-
"--overwrite/--preserve",
|
527
|
-
help="Whether to overwrite existing files in the output directory",
|
528
|
-
),
|
529
|
-
] = False,
|
530
|
-
temperature: Annotated[
|
531
|
-
float,
|
532
|
-
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
533
|
-
] = 0.7,
|
534
|
-
collection: Annotated[
|
535
|
-
str,
|
536
|
-
typer.Option(
|
537
|
-
"--collection",
|
538
|
-
"-c",
|
539
|
-
help="If set, will put the translated result into a Chroma DB "
|
540
|
-
"collection with the name provided.",
|
541
|
-
),
|
542
|
-
] = None,
|
543
|
-
splitter_type: Annotated[
|
544
|
-
str,
|
545
|
-
typer.Option(
|
546
|
-
"-S",
|
547
|
-
"--splitter",
|
548
|
-
help="Name of custom splitter to use",
|
549
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
550
|
-
),
|
551
|
-
] = "file",
|
552
|
-
intermediate_converters: Annotated[
|
553
|
-
List[str],
|
554
|
-
typer.Option(
|
555
|
-
"-C",
|
556
|
-
"--converter",
|
557
|
-
help="Name of an intermediate converter to use",
|
558
|
-
click_type=click.Choice([c.__name__ for c in get_subclasses(Converter)]),
|
559
|
-
),
|
560
|
-
] = ["Documenter"],
|
561
|
-
):
|
562
|
-
converter_subclasses = get_subclasses(Converter)
|
563
|
-
converter_subclasses_map = {c.__name__: c for c in converter_subclasses}
|
564
|
-
model_arguments = dict(temperature=temperature)
|
565
|
-
collections_config = get_collections_config()
|
566
|
-
converters = []
|
567
|
-
for ic in intermediate_converters:
|
568
|
-
converters.append(
|
569
|
-
converter_subclasses_map[ic](
|
570
|
-
model=llm_name,
|
571
|
-
model_arguments=model_arguments,
|
572
|
-
source_language=language,
|
573
|
-
max_prompts=max_prompts,
|
574
|
-
db_path=db_loc,
|
575
|
-
db_config=collections_config,
|
576
|
-
splitter_type=splitter_type,
|
577
|
-
)
|
578
|
-
)
|
579
|
-
|
580
|
-
aggregator = Aggregator(
|
581
|
-
intermediate_converters=converters,
|
582
|
-
model=llm_name,
|
583
|
-
model_arguments=model_arguments,
|
584
|
-
source_language=language,
|
585
|
-
max_prompts=max_prompts,
|
586
|
-
db_path=db_loc,
|
587
|
-
db_config=collections_config,
|
588
|
-
splitter_type=splitter_type,
|
589
|
-
prompt_template="basic_aggregation",
|
590
|
-
)
|
591
|
-
aggregator.translate(input_dir, output_dir, overwrite, collection)
|
592
|
-
|
593
|
-
|
594
|
-
@app.command(
|
595
|
-
help="Partition input code using an LLM.",
|
596
|
-
no_args_is_help=True,
|
597
|
-
)
|
598
|
-
def partition(
|
599
|
-
input_dir: Annotated[
|
600
|
-
Path,
|
601
|
-
typer.Option(
|
602
|
-
"--input",
|
603
|
-
"-i",
|
604
|
-
help="The directory containing the source code to be partitioned. ",
|
605
|
-
),
|
606
|
-
],
|
607
|
-
language: Annotated[
|
608
|
-
str,
|
609
|
-
typer.Option(
|
610
|
-
"--language",
|
611
|
-
"-l",
|
612
|
-
help="The language of the source code.",
|
613
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
614
|
-
),
|
615
|
-
],
|
616
|
-
output_dir: Annotated[
|
617
|
-
Path,
|
618
|
-
typer.Option(
|
619
|
-
"--output-dir", "-o", help="The directory to store the partitioned code in."
|
620
|
-
),
|
621
|
-
],
|
622
|
-
llm_name: Annotated[
|
623
|
-
str,
|
624
|
-
typer.Option(
|
625
|
-
"--llm",
|
626
|
-
"-L",
|
627
|
-
help="The custom name of the model set with 'janus llm add'.",
|
628
|
-
),
|
629
|
-
] = "gpt-4o",
|
630
|
-
max_prompts: Annotated[
|
631
|
-
int,
|
632
|
-
typer.Option(
|
633
|
-
"--max-prompts",
|
634
|
-
"-m",
|
635
|
-
help="The maximum number of times to prompt a model on one functional block "
|
636
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
637
|
-
),
|
638
|
-
] = 10,
|
639
|
-
overwrite: Annotated[
|
640
|
-
bool,
|
641
|
-
typer.Option(
|
642
|
-
"--overwrite/--preserve",
|
643
|
-
help="Whether to overwrite existing files in the output directory",
|
644
|
-
),
|
645
|
-
] = False,
|
646
|
-
temperature: Annotated[
|
647
|
-
float,
|
648
|
-
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
649
|
-
] = 0.7,
|
650
|
-
splitter_type: Annotated[
|
651
|
-
str,
|
652
|
-
typer.Option(
|
653
|
-
"-S",
|
654
|
-
"--splitter",
|
655
|
-
help="Name of custom splitter to use",
|
656
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
657
|
-
),
|
658
|
-
] = "file",
|
659
|
-
refiner_types: Annotated[
|
660
|
-
list[str],
|
661
|
-
typer.Option(
|
662
|
-
"-r",
|
663
|
-
"--refiner",
|
664
|
-
help="List of refiner types to use. Add -r for each refiner to use in\
|
665
|
-
refinement chain",
|
666
|
-
click_type=click.Choice(list(REFINERS.keys())),
|
667
|
-
),
|
668
|
-
] = ["JanusRefiner"],
|
669
|
-
max_tokens: Annotated[
|
670
|
-
int,
|
671
|
-
typer.Option(
|
672
|
-
"--max-tokens",
|
673
|
-
"-M",
|
674
|
-
help="The maximum number of tokens the model will take in. "
|
675
|
-
"If unspecificed, model's default max will be used.",
|
676
|
-
),
|
677
|
-
] = None,
|
678
|
-
partition_token_limit: Annotated[
|
679
|
-
int,
|
680
|
-
typer.Option(
|
681
|
-
"--partition-tokens",
|
682
|
-
"-pt",
|
683
|
-
help="The limit on the number of tokens per partition.",
|
684
|
-
),
|
685
|
-
] = 8192,
|
686
|
-
):
|
687
|
-
refiner_types = [REFINERS[r] for r in refiner_types]
|
688
|
-
model_arguments = dict(temperature=temperature)
|
689
|
-
kwargs = dict(
|
690
|
-
model=llm_name,
|
691
|
-
model_arguments=model_arguments,
|
692
|
-
source_language=language,
|
693
|
-
max_prompts=max_prompts,
|
694
|
-
max_tokens=max_tokens,
|
695
|
-
splitter_type=splitter_type,
|
696
|
-
refiner_types=refiner_types,
|
697
|
-
partition_token_limit=partition_token_limit,
|
698
|
-
)
|
699
|
-
partitioner = Partitioner(**kwargs)
|
700
|
-
partitioner.translate(input_dir, output_dir, overwrite)
|
701
|
-
|
702
|
-
|
703
|
-
@app.command(
|
704
|
-
help="Diagram input code using an LLM.",
|
705
|
-
no_args_is_help=True,
|
706
|
-
)
|
707
|
-
def diagram(
|
708
|
-
input_dir: Annotated[
|
709
|
-
Path,
|
710
|
-
typer.Option(
|
711
|
-
"--input",
|
712
|
-
"-i",
|
713
|
-
help="The directory containing the source code to be translated. "
|
714
|
-
"The files should all be in one flat directory.",
|
715
|
-
),
|
716
|
-
],
|
717
|
-
language: Annotated[
|
718
|
-
str,
|
719
|
-
typer.Option(
|
720
|
-
"--language",
|
721
|
-
"-l",
|
722
|
-
help="The language of the source code.",
|
723
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
724
|
-
),
|
725
|
-
],
|
726
|
-
output_dir: Annotated[
|
727
|
-
Path,
|
728
|
-
typer.Option(
|
729
|
-
"--output-dir", "-o", help="The directory to store the translated code in."
|
730
|
-
),
|
731
|
-
],
|
732
|
-
llm_name: Annotated[
|
733
|
-
str,
|
734
|
-
typer.Option(
|
735
|
-
"--llm",
|
736
|
-
"-L",
|
737
|
-
help="The custom name of the model set with 'janus llm add'.",
|
738
|
-
),
|
739
|
-
],
|
740
|
-
max_prompts: Annotated[
|
741
|
-
int,
|
742
|
-
typer.Option(
|
743
|
-
"--max-prompts",
|
744
|
-
"-m",
|
745
|
-
help="The maximum number of times to prompt a model on one functional block "
|
746
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
747
|
-
),
|
748
|
-
] = 10,
|
749
|
-
overwrite: Annotated[
|
750
|
-
bool,
|
751
|
-
typer.Option(
|
752
|
-
"--overwrite/--preserve",
|
753
|
-
help="Whether to overwrite existing files in the output directory",
|
754
|
-
),
|
755
|
-
] = False,
|
756
|
-
temperature: Annotated[
|
757
|
-
float,
|
758
|
-
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
759
|
-
] = 0.7,
|
760
|
-
collection: Annotated[
|
761
|
-
str,
|
762
|
-
typer.Option(
|
763
|
-
"--collection",
|
764
|
-
"-c",
|
765
|
-
help="If set, will put the translated result into a Chroma DB "
|
766
|
-
"collection with the name provided.",
|
767
|
-
),
|
768
|
-
] = None,
|
769
|
-
diagram_type: Annotated[
|
770
|
-
str,
|
771
|
-
typer.Option(
|
772
|
-
"--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
|
773
|
-
),
|
774
|
-
] = "Activity",
|
775
|
-
add_documentation: Annotated[
|
776
|
-
bool,
|
777
|
-
typer.Option(
|
778
|
-
"--add-documentation/--no-documentation",
|
779
|
-
"-ad",
|
780
|
-
help="Whether to use documentation in generation",
|
781
|
-
),
|
782
|
-
] = False,
|
783
|
-
splitter_type: Annotated[
|
784
|
-
str,
|
785
|
-
typer.Option(
|
786
|
-
"-S",
|
787
|
-
"--splitter",
|
788
|
-
help="Name of custom splitter to use",
|
789
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
790
|
-
),
|
791
|
-
] = "file",
|
792
|
-
refiner_types: Annotated[
|
793
|
-
list[str],
|
794
|
-
typer.Option(
|
795
|
-
"-r",
|
796
|
-
"--refiner",
|
797
|
-
help="List of refiner types to use. Add -r for each refiner to use in\
|
798
|
-
refinement chain",
|
799
|
-
click_type=click.Choice(list(REFINERS.keys())),
|
800
|
-
),
|
801
|
-
] = ["JanusRefiner"],
|
802
|
-
retriever_type: Annotated[
|
803
|
-
str,
|
804
|
-
typer.Option(
|
805
|
-
"-R",
|
806
|
-
"--retriever",
|
807
|
-
help="Name of custom retriever to use",
|
808
|
-
click_type=click.Choice(["active_usings", "language_docs"]),
|
809
|
-
),
|
810
|
-
] = None,
|
811
|
-
):
|
812
|
-
refiner_types = [REFINERS[r] for r in refiner_types]
|
813
|
-
model_arguments = dict(temperature=temperature)
|
814
|
-
collections_config = get_collections_config()
|
815
|
-
diagram_generator = DiagramGenerator(
|
816
|
-
model=llm_name,
|
817
|
-
model_arguments=model_arguments,
|
818
|
-
source_language=language,
|
819
|
-
max_prompts=max_prompts,
|
820
|
-
db_path=db_loc,
|
821
|
-
db_config=collections_config,
|
822
|
-
splitter_type=splitter_type,
|
823
|
-
refiner_types=refiner_types,
|
824
|
-
retriever_type=retriever_type,
|
825
|
-
diagram_type=diagram_type,
|
826
|
-
add_documentation=add_documentation,
|
827
|
-
)
|
828
|
-
diagram_generator.translate(input_dir, output_dir, overwrite, collection)
|
829
|
-
|
830
|
-
|
831
|
-
@app.command(
|
832
|
-
help="LLM self evaluation",
|
833
|
-
no_args_is_help=True,
|
834
|
-
)
|
835
|
-
def llm_self_eval(
|
836
|
-
input_dir: Annotated[
|
837
|
-
Path,
|
838
|
-
typer.Option(
|
839
|
-
"--input",
|
840
|
-
"-i",
|
841
|
-
help="The directory containing the source code to be evaluated. "
|
842
|
-
"The files should all be in one flat directory.",
|
843
|
-
),
|
844
|
-
],
|
845
|
-
language: Annotated[
|
846
|
-
str,
|
847
|
-
typer.Option(
|
848
|
-
"--language",
|
849
|
-
"-l",
|
850
|
-
help="The language of the source code.",
|
851
|
-
click_type=click.Choice(sorted(LANGUAGES)),
|
852
|
-
),
|
853
|
-
],
|
854
|
-
output_dir: Annotated[
|
855
|
-
Path,
|
856
|
-
typer.Option(
|
857
|
-
"--output-dir", "-o", help="The directory to store the evaluations in."
|
858
|
-
),
|
859
|
-
],
|
860
|
-
llm_name: Annotated[
|
861
|
-
str,
|
862
|
-
typer.Option(
|
863
|
-
"--llm",
|
864
|
-
"-L",
|
865
|
-
help="The custom name of the model set with 'janus llm add'.",
|
866
|
-
),
|
867
|
-
] = "gpt-4o",
|
868
|
-
evaluation_type: Annotated[
|
869
|
-
str,
|
870
|
-
typer.Option(
|
871
|
-
"--evaluation-type",
|
872
|
-
"-e",
|
873
|
-
help="Type of output to evaluate.",
|
874
|
-
click_type=click.Choice(["incose", "comments"]),
|
875
|
-
),
|
876
|
-
] = "incose",
|
877
|
-
max_prompts: Annotated[
|
878
|
-
int,
|
879
|
-
typer.Option(
|
880
|
-
"--max-prompts",
|
881
|
-
"-m",
|
882
|
-
help="The maximum number of times to prompt a model on one functional block "
|
883
|
-
"before exiting the application. This is to prevent wasting too much money.",
|
884
|
-
),
|
885
|
-
] = 10,
|
886
|
-
overwrite: Annotated[
|
887
|
-
bool,
|
888
|
-
typer.Option(
|
889
|
-
"--overwrite/--preserve",
|
890
|
-
help="Whether to overwrite existing files in the output directory",
|
891
|
-
),
|
892
|
-
] = False,
|
893
|
-
temperature: Annotated[
|
894
|
-
float,
|
895
|
-
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
896
|
-
] = 0.7,
|
897
|
-
collection: Annotated[
|
898
|
-
str,
|
899
|
-
typer.Option(
|
900
|
-
"--collection",
|
901
|
-
"-c",
|
902
|
-
help="If set, will put the translated result into a Chroma DB "
|
903
|
-
"collection with the name provided.",
|
904
|
-
),
|
905
|
-
] = None,
|
906
|
-
splitter_type: Annotated[
|
907
|
-
str,
|
908
|
-
typer.Option(
|
909
|
-
"-S",
|
910
|
-
"--splitter",
|
911
|
-
help="Name of custom splitter to use",
|
912
|
-
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
913
|
-
),
|
914
|
-
] = "file",
|
915
|
-
refiner_types: Annotated[
|
916
|
-
list[str],
|
917
|
-
typer.Option(
|
918
|
-
"-r",
|
919
|
-
"--refiner",
|
920
|
-
help="List of refiner types to use. Add -r for each refiner to use in\
|
921
|
-
refinement chain",
|
922
|
-
click_type=click.Choice(list(REFINERS.keys())),
|
923
|
-
),
|
924
|
-
] = ["JanusRefiner"],
|
925
|
-
eval_items_per_request: Annotated[
|
926
|
-
int,
|
927
|
-
typer.Option(
|
928
|
-
"--eval-items-per-request",
|
929
|
-
"-rc",
|
930
|
-
help="The maximum number of evaluation items per request",
|
931
|
-
),
|
932
|
-
] = None,
|
933
|
-
max_tokens: Annotated[
|
934
|
-
int,
|
935
|
-
typer.Option(
|
936
|
-
"--max-tokens",
|
937
|
-
"-M",
|
938
|
-
help="The maximum number of tokens the model will take in. "
|
939
|
-
"If unspecificed, model's default max will be used.",
|
940
|
-
),
|
941
|
-
] = None,
|
942
|
-
):
|
943
|
-
model_arguments = dict(temperature=temperature)
|
944
|
-
refiner_types = [REFINERS[r] for r in refiner_types]
|
945
|
-
kwargs = dict(
|
946
|
-
eval_items_per_request=eval_items_per_request,
|
947
|
-
model=llm_name,
|
948
|
-
model_arguments=model_arguments,
|
949
|
-
source_language=language,
|
950
|
-
max_prompts=max_prompts,
|
951
|
-
max_tokens=max_tokens,
|
952
|
-
splitter_type=splitter_type,
|
953
|
-
refiner_types=refiner_types,
|
954
|
-
)
|
955
|
-
# Setting parser type here
|
956
|
-
if evaluation_type == "incose":
|
957
|
-
evaluator = RequirementEvaluator(**kwargs)
|
958
|
-
elif evaluation_type == "comments":
|
959
|
-
evaluator = InlineCommentEvaluator(**kwargs)
|
960
|
-
|
961
|
-
evaluator.translate(input_dir, output_dir, overwrite, collection)
|
962
|
-
|
963
|
-
|
964
|
-
@db.command("init", help="Connect to or create a database.")
|
965
|
-
def db_init(
|
966
|
-
path: Annotated[
|
967
|
-
str, typer.Option("--path", "-p", help="The path to the database file.")
|
968
|
-
] = str(janus_dir / "chroma.db"),
|
969
|
-
url: Annotated[
|
970
|
-
str,
|
971
|
-
typer.Option(
|
972
|
-
"--url",
|
973
|
-
"-u",
|
974
|
-
help="The URL of the database if the database is running externally.",
|
975
|
-
),
|
976
|
-
] = "",
|
977
|
-
) -> None:
|
978
|
-
global db_loc
|
979
|
-
if url != "":
|
980
|
-
print(f"Pointing to Chroma DB at {url}")
|
981
|
-
with open(db_file, "w") as f:
|
982
|
-
f.write(url)
|
983
|
-
db_loc = url
|
984
|
-
else:
|
985
|
-
path = os.path.abspath(path)
|
986
|
-
print(f"Setting up Chroma DB at {path}")
|
987
|
-
with open(db_file, "w") as f:
|
988
|
-
f.write(path)
|
989
|
-
db_loc = path
|
990
|
-
global embedding_db
|
991
|
-
embedding_db = ChromaEmbeddingDatabase(db_loc)
|
992
|
-
|
993
|
-
|
994
|
-
@db.command("status", help="Print current database location.")
|
995
|
-
def db_status():
|
996
|
-
print(f"Chroma DB currently pointing to {db_loc}")
|
997
|
-
|
998
|
-
|
999
|
-
@db.command(
|
1000
|
-
"ls",
|
1001
|
-
help="List the current database's collections. Or supply a collection name to list "
|
1002
|
-
"information about its contents.",
|
1003
|
-
)
|
1004
|
-
def db_ls(
|
1005
|
-
collection_name: Annotated[
|
1006
|
-
Optional[str], typer.Argument(help="The name of the collection.")
|
1007
|
-
] = None,
|
1008
|
-
peek: Annotated[
|
1009
|
-
Optional[int],
|
1010
|
-
typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
|
1011
|
-
] = None,
|
1012
|
-
) -> None:
|
1013
|
-
"""List the current database's collections"""
|
1014
|
-
if peek is not None and collection_name is None:
|
1015
|
-
print(
|
1016
|
-
"\n[bold red]Cannot peek at all collections. Please specify a "
|
1017
|
-
"collection by name.[/bold red]"
|
1018
|
-
)
|
1019
|
-
return
|
1020
|
-
db = ChromaEmbeddingDatabase(db_loc)
|
1021
|
-
collections = Collections(db)
|
1022
|
-
collection_list = collections.get(collection_name)
|
1023
|
-
for collection in collection_list:
|
1024
|
-
print(
|
1025
|
-
f"\n[bold underline]Collection[/bold underline]: "
|
1026
|
-
f"[bold salmon1]{collection.name}[/bold salmon1]"
|
1027
|
-
)
|
1028
|
-
print(f" ID: {collection.id}")
|
1029
|
-
print(f" Metadata: {collection.metadata}")
|
1030
|
-
print(f" Tenant: [green]{collection.tenant}[/green]")
|
1031
|
-
print(f" Database: [green]{collection.database}[/green]")
|
1032
|
-
print(f" Length: {collection.count()}")
|
1033
|
-
if peek:
|
1034
|
-
entry = collection.peek(peek)
|
1035
|
-
entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
|
1036
|
-
if peek == 1:
|
1037
|
-
print(" [bold]Peeking at first entry[/bold]:")
|
1038
|
-
else:
|
1039
|
-
print(f" [bold]Peeking at first {peek} entries[/bold]:")
|
1040
|
-
print(entry)
|
1041
|
-
print()
|
1042
|
-
|
1043
|
-
|
1044
|
-
@db.command("add", help="Add a collection to the current database.")
|
1045
|
-
def db_add(
|
1046
|
-
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
1047
|
-
model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
|
1048
|
-
input_dir: Annotated[
|
1049
|
-
str,
|
1050
|
-
typer.Option(
|
1051
|
-
"--input",
|
1052
|
-
"-i",
|
1053
|
-
help="The directory containing the source code to be added.",
|
1054
|
-
),
|
1055
|
-
] = "./",
|
1056
|
-
input_lang: Annotated[
|
1057
|
-
str, typer.Option("--language", "-l", help="The language of the source code.")
|
1058
|
-
] = "python",
|
1059
|
-
max_tokens: Annotated[
|
1060
|
-
int,
|
1061
|
-
typer.Option(
|
1062
|
-
"--max-tokens",
|
1063
|
-
"-m",
|
1064
|
-
help="The maximum number of tokens for each chunk of input source code.",
|
1065
|
-
),
|
1066
|
-
] = 4096,
|
1067
|
-
) -> None:
|
1068
|
-
"""Add a collection to the database
|
1069
|
-
|
1070
|
-
Arguments:
|
1071
|
-
collection_name: The name of the collection to add
|
1072
|
-
model_name: The name of the embedding model to use
|
1073
|
-
input_dir: The directory containing the source code to be added
|
1074
|
-
input_lang: The language of the source code
|
1075
|
-
max_tokens: The maximum number of tokens for each chunk of input source code
|
1076
|
-
"""
|
1077
|
-
# TODO: import factory
|
1078
|
-
console = Console()
|
1079
|
-
|
1080
|
-
added_to = _check_collection(collection_name, input_dir)
|
1081
|
-
collections_config = get_collections_config()
|
1082
|
-
|
1083
|
-
with console.status(
|
1084
|
-
f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
|
1085
|
-
spinner="arrow3",
|
1086
|
-
):
|
1087
|
-
vectorizer_factory = ChromaDBVectorizer()
|
1088
|
-
vectorizer = vectorizer_factory.create_vectorizer(
|
1089
|
-
path=db_loc, config=collections_config
|
1090
|
-
)
|
1091
|
-
vectorizer.get_or_create_collection(collection_name, model_name=model_name)
|
1092
|
-
input_dir = Path(input_dir)
|
1093
|
-
suffix = LANGUAGES[input_lang]["suffix"]
|
1094
|
-
source_glob = f"**/*.{suffix}"
|
1095
|
-
input_paths = [p for p in input_dir.rglob(source_glob)]
|
1096
|
-
if input_lang in CUSTOM_SPLITTERS:
|
1097
|
-
if input_lang == "mumps":
|
1098
|
-
splitter = MumpsSplitter(
|
1099
|
-
max_tokens=max_tokens,
|
1100
|
-
)
|
1101
|
-
elif input_lang == "binary":
|
1102
|
-
splitter = BinarySplitter(
|
1103
|
-
max_tokens=max_tokens,
|
1104
|
-
)
|
1105
|
-
else:
|
1106
|
-
splitter = TreeSitterSplitter(
|
1107
|
-
language=input_lang,
|
1108
|
-
max_tokens=max_tokens,
|
1109
|
-
)
|
1110
|
-
for input_path in input_paths:
|
1111
|
-
input_block = splitter.split(input_path)
|
1112
|
-
vectorizer.add_nodes_recursively(
|
1113
|
-
input_block,
|
1114
|
-
collection_name,
|
1115
|
-
input_path.name,
|
1116
|
-
)
|
1117
|
-
total_files = len([p for p in Path.glob(input_dir, "**/*") if not p.is_dir()])
|
1118
|
-
if added_to:
|
1119
|
-
print(
|
1120
|
-
f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
1121
|
-
f" Embedding Model: [green]{model_name}[/green]\n"
|
1122
|
-
f" Input Directory: {input_dir.absolute()}\n"
|
1123
|
-
f" {input_lang.capitalize()} [green]*.{suffix}[/green] Files: "
|
1124
|
-
f"{len(input_paths)}\n"
|
1125
|
-
" Other Files (skipped): "
|
1126
|
-
f"{total_files - len(input_paths)}\n"
|
1127
|
-
)
|
1128
|
-
[p for p in Path.glob(input_dir, f"**/*.{suffix}") if not p.is_dir()]
|
1129
|
-
else:
|
1130
|
-
print(
|
1131
|
-
f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
1132
|
-
f" Embedding Model: '{model_name}'\n"
|
1133
|
-
f" Input Directory: {input_dir.absolute()}\n"
|
1134
|
-
f" {input_lang.capitalize()} [green]*.{suffix}[/green] Files: "
|
1135
|
-
f"{len(input_paths)}\n"
|
1136
|
-
" Other Files (skipped): "
|
1137
|
-
f"{total_files - len(input_paths)}\n"
|
1138
|
-
)
|
1139
|
-
with open(collections_config_file, "w") as f:
|
1140
|
-
json.dump(vectorizer.config, f, indent=2)
|
1141
|
-
|
1142
|
-
|
1143
|
-
@db.command(
|
1144
|
-
"rm",
|
1145
|
-
help="Remove a collection from the database.",
|
1146
|
-
)
|
1147
|
-
def db_rm(
|
1148
|
-
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
1149
|
-
confirm: Annotated[
|
1150
|
-
bool,
|
1151
|
-
typer.Option(
|
1152
|
-
"--yes",
|
1153
|
-
"-y",
|
1154
|
-
help="Confirm the removal of the collection.",
|
1155
|
-
),
|
1156
|
-
],
|
1157
|
-
) -> None:
|
1158
|
-
"""Remove a collection from the database
|
1159
|
-
|
1160
|
-
Arguments:
|
1161
|
-
collection_name: The name of the collection to remove
|
1162
|
-
"""
|
1163
|
-
if not confirm:
|
1164
|
-
delete = Confirm.ask(
|
1165
|
-
f"\nAre you sure you want to [bold red]remove[/bold red] "
|
1166
|
-
f"[bold salmon1]{collection_name}[/bold salmon1]?",
|
1167
|
-
)
|
1168
|
-
else:
|
1169
|
-
delete = True
|
1170
|
-
if not delete:
|
1171
|
-
raise typer.Abort()
|
1172
|
-
db = ChromaEmbeddingDatabase(db_loc)
|
1173
|
-
collections = Collections(db)
|
1174
|
-
collections.delete(collection_name)
|
1175
|
-
print(
|
1176
|
-
f"[bold red]Removed[/bold red] collection "
|
1177
|
-
f"[bold salmon1]{collection_name}[/bold salmon1]"
|
1178
|
-
)
|
1179
|
-
|
1180
|
-
|
1181
|
-
def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
|
1182
|
-
db = ChromaEmbeddingDatabase(db_loc)
|
1183
|
-
collections = Collections(db)
|
1184
|
-
added_to = False
|
1185
|
-
try:
|
1186
|
-
collections.get(collection_name)
|
1187
|
-
# confirm_add = Confirm.ask(
|
1188
|
-
# f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
|
1189
|
-
# "you sure you want to update it with the contents of"
|
1190
|
-
# f"[bold green]{input_dir}[/bold green]?"
|
1191
|
-
# )
|
1192
|
-
added_to = True
|
1193
|
-
# if not confirm_add:
|
1194
|
-
# raise typer.Abort()
|
1195
|
-
except ValueError:
|
1196
|
-
pass
|
1197
|
-
return added_to
|
1198
|
-
|
1199
|
-
|
1200
|
-
@llm.command("add", help="Add a model config to janus")
|
1201
|
-
def llm_add(
|
1202
|
-
model_name: Annotated[
|
1203
|
-
str, typer.Argument(help="The user's custom name of the model")
|
1204
|
-
],
|
1205
|
-
model_type: Annotated[
|
1206
|
-
str,
|
1207
|
-
typer.Option(
|
1208
|
-
"--type",
|
1209
|
-
"-t",
|
1210
|
-
help="The type of the model",
|
1211
|
-
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
1212
|
-
),
|
1213
|
-
] = "Azure",
|
1214
|
-
):
|
1215
|
-
if not MODEL_CONFIG_DIR.exists():
|
1216
|
-
MODEL_CONFIG_DIR.mkdir(parents=True)
|
1217
|
-
model_cfg = MODEL_CONFIG_DIR / f"{model_name}.json"
|
1218
|
-
if model_type == "HuggingFace":
|
1219
|
-
url = typer.prompt("Enter the model's URL")
|
1220
|
-
max_tokens = typer.prompt(
|
1221
|
-
"Enter the model's maximum tokens", default=4096, type=int
|
1222
|
-
)
|
1223
|
-
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
1224
|
-
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
1225
|
-
params = dict(
|
1226
|
-
inference_server_url=url,
|
1227
|
-
max_new_tokens=max_tokens,
|
1228
|
-
top_k=10,
|
1229
|
-
top_p=0.95,
|
1230
|
-
typical_p=0.95,
|
1231
|
-
temperature=0.01,
|
1232
|
-
repetition_penalty=1.03,
|
1233
|
-
timeout=240,
|
1234
|
-
)
|
1235
|
-
cfg = {
|
1236
|
-
"model_type": model_type,
|
1237
|
-
"model_args": params,
|
1238
|
-
"token_limit": max_tokens,
|
1239
|
-
"model_cost": {"input": in_cost, "output": out_cost},
|
1240
|
-
}
|
1241
|
-
elif model_type == "HuggingFaceLocal":
|
1242
|
-
model_id = typer.prompt("Enter the model ID")
|
1243
|
-
task = typer.prompt("Enter the task")
|
1244
|
-
max_tokens = typer.prompt(
|
1245
|
-
"Enter the model's maximum tokens", default=4096, type=int
|
1246
|
-
)
|
1247
|
-
in_cost = 0
|
1248
|
-
out_cost = 0
|
1249
|
-
params = {"model_id": model_id, "task": task}
|
1250
|
-
cfg = {
|
1251
|
-
"model_type": model_type,
|
1252
|
-
"model_args": params,
|
1253
|
-
"token_limit": max_tokens,
|
1254
|
-
"model_cost": {"input": in_cost, "output": out_cost},
|
1255
|
-
}
|
1256
|
-
elif model_type == "OpenAI":
|
1257
|
-
print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
|
1258
|
-
model_id = typer.prompt(
|
1259
|
-
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1260
|
-
default="gpt-4o",
|
1261
|
-
type=click.Choice(openai_models),
|
1262
|
-
show_choices=False,
|
1263
|
-
)
|
1264
|
-
params = dict(
|
1265
|
-
model_name=model_name,
|
1266
|
-
temperature=0.7,
|
1267
|
-
n=1,
|
1268
|
-
)
|
1269
|
-
max_tokens = TOKEN_LIMITS[model_name]
|
1270
|
-
model_cost = COST_PER_1K_TOKENS[model_name]
|
1271
|
-
cfg = {
|
1272
|
-
"model_type": model_type,
|
1273
|
-
"model_id": model_id,
|
1274
|
-
"model_args": params,
|
1275
|
-
"token_limit": max_tokens,
|
1276
|
-
"model_cost": model_cost,
|
1277
|
-
}
|
1278
|
-
elif model_type == "Azure":
|
1279
|
-
model_id = typer.prompt(
|
1280
|
-
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1281
|
-
default="gpt-4o",
|
1282
|
-
type=click.Choice(azure_models),
|
1283
|
-
show_choices=False,
|
1284
|
-
)
|
1285
|
-
params = dict(
|
1286
|
-
# Azure uses the "azure_deployment" key for what we're calling "long_model_id"
|
1287
|
-
azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
|
1288
|
-
temperature=0.7,
|
1289
|
-
n=1,
|
1290
|
-
)
|
1291
|
-
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
1292
|
-
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
1293
|
-
cfg = {
|
1294
|
-
"model_type": model_type,
|
1295
|
-
"model_id": model_id,
|
1296
|
-
"model_args": params,
|
1297
|
-
"token_limit": max_tokens,
|
1298
|
-
"model_cost": model_cost,
|
1299
|
-
}
|
1300
|
-
elif model_type == "BedrockChat" or model_type == "Bedrock":
|
1301
|
-
model_id = typer.prompt(
|
1302
|
-
"Enter the model ID (list model IDs with `janus llm ls -a`)",
|
1303
|
-
default="bedrock-claude-sonnet",
|
1304
|
-
type=click.Choice(bedrock_models),
|
1305
|
-
show_choices=False,
|
1306
|
-
)
|
1307
|
-
params = dict(
|
1308
|
-
# Bedrock uses the "model_id" key for what we're calling "long_model_id"
|
1309
|
-
model_id=MODEL_ID_TO_LONG_ID[model_id],
|
1310
|
-
model_kwargs={"temperature": 0.7},
|
1311
|
-
)
|
1312
|
-
max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
|
1313
|
-
model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
|
1314
|
-
cfg = {
|
1315
|
-
"model_type": model_type,
|
1316
|
-
"model_id": model_id,
|
1317
|
-
"model_args": params,
|
1318
|
-
"token_limit": max_tokens,
|
1319
|
-
"model_cost": model_cost,
|
1320
|
-
}
|
1321
|
-
else:
|
1322
|
-
raise ValueError(f"Unknown model type {model_type}")
|
1323
|
-
with open(model_cfg, "w") as f:
|
1324
|
-
json.dump(cfg, f, indent=2)
|
1325
|
-
print(f"Model config written to {model_cfg}")
|
1326
|
-
|
1327
|
-
|
1328
|
-
@llm.command("ls", help="List all of the user-configured models")
|
1329
|
-
def llm_ls(
|
1330
|
-
all: Annotated[
|
1331
|
-
bool,
|
1332
|
-
typer.Option(
|
1333
|
-
"--all",
|
1334
|
-
"-a",
|
1335
|
-
is_flag=True,
|
1336
|
-
help="List all models, including the default model IDs.",
|
1337
|
-
click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
|
1338
|
-
),
|
1339
|
-
] = False,
|
1340
|
-
):
|
1341
|
-
print("\n[green]User-configured models[/green]:")
|
1342
|
-
for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
|
1343
|
-
with open(model_cfg, "r") as f:
|
1344
|
-
cfg = json.load(f)
|
1345
|
-
print(f"\t[blue]{model_cfg.stem}[/blue]: [purple]{cfg['model_type']}[/purple]")
|
1346
|
-
|
1347
|
-
if all:
|
1348
|
-
print("\n[green]Available model IDs[/green]:")
|
1349
|
-
for model_id, model_type in MODEL_TYPES.items():
|
1350
|
-
print(f"\t[blue]{model_id}[/blue]: [purple]{model_type}[/purple]")
|
1351
|
-
|
1352
|
-
|
1353
|
-
@embedding.command("add", help="Add an embedding model config to janus")
|
1354
|
-
def embedding_add(
|
1355
|
-
model_name: Annotated[
|
1356
|
-
str, typer.Argument(help="The user's custom name for the model")
|
1357
|
-
],
|
1358
|
-
model_type: Annotated[
|
1359
|
-
str,
|
1360
|
-
typer.Option(
|
1361
|
-
"--type",
|
1362
|
-
"-t",
|
1363
|
-
help="The type of the model",
|
1364
|
-
click_type=click.Choice(list(val.value for val in EmbeddingModelType)),
|
1365
|
-
),
|
1366
|
-
] = "OpenAI",
|
1367
|
-
):
|
1368
|
-
if not EMBEDDING_MODEL_CONFIG_DIR.exists():
|
1369
|
-
EMBEDDING_MODEL_CONFIG_DIR.mkdir(parents=True)
|
1370
|
-
model_cfg = EMBEDDING_MODEL_CONFIG_DIR / f"{model_name}.json"
|
1371
|
-
if model_type in EmbeddingModelType.HuggingFaceInferenceAPI.values:
|
1372
|
-
hf = typer.style("HuggingFaceInferenceAPI", fg="yellow")
|
1373
|
-
url = typer.prompt(f"Enter the {hf} model's URL", type=str, value_proc=AnyHttpUrl)
|
1374
|
-
api_model_name = typer.prompt("Enter the model's name", type=str, default="")
|
1375
|
-
api_key = typer.prompt("Enter the API key", type=str, default="")
|
1376
|
-
max_tokens = typer.prompt(
|
1377
|
-
"Enter the model's maximum tokens", default=8191, type=int
|
1378
|
-
)
|
1379
|
-
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
1380
|
-
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
1381
|
-
params = dict(
|
1382
|
-
model_name=api_model_name,
|
1383
|
-
api_key=api_key,
|
1384
|
-
)
|
1385
|
-
cfg = {
|
1386
|
-
"model_type": model_type,
|
1387
|
-
"model_identifier": str(url),
|
1388
|
-
"model_args": params,
|
1389
|
-
"token_limit": max_tokens,
|
1390
|
-
"model_cost": {"input": in_cost, "output": out_cost},
|
1391
|
-
}
|
1392
|
-
elif model_type in EmbeddingModelType.HuggingFaceLocal.values:
|
1393
|
-
hf = typer.style("HuggingFace", fg="yellow")
|
1394
|
-
model_id = typer.prompt(
|
1395
|
-
f"Enter the {hf} model ID",
|
1396
|
-
default="sentence-transformers/all-MiniLM-L6-v2",
|
1397
|
-
type=str,
|
1398
|
-
)
|
1399
|
-
cache_folder = str(
|
1400
|
-
Path(
|
1401
|
-
typer.prompt(
|
1402
|
-
"Enter the model's cache folder",
|
1403
|
-
default=EMBEDDING_MODEL_CONFIG_DIR / "cache",
|
1404
|
-
type=str,
|
1405
|
-
)
|
1406
|
-
)
|
1407
|
-
)
|
1408
|
-
max_tokens = typer.prompt(
|
1409
|
-
"Enter the model's maximum tokens", default=8191, type=int
|
1410
|
-
)
|
1411
|
-
params = dict(
|
1412
|
-
cache_folder=str(cache_folder),
|
1413
|
-
)
|
1414
|
-
cfg = {
|
1415
|
-
"model_type": model_type,
|
1416
|
-
"model_identifier": model_id,
|
1417
|
-
"model_args": params,
|
1418
|
-
"token_limit": max_tokens,
|
1419
|
-
"model_cost": {"input": 0, "output": 0},
|
1420
|
-
}
|
1421
|
-
elif model_type in EmbeddingModelType.OpenAI.values:
|
1422
|
-
available_models = list(EMBEDDING_COST_PER_MODEL.keys())
|
1423
|
-
|
1424
|
-
open_ai = typer.style("OpenAI", fg="green")
|
1425
|
-
prompt = f"Enter the {open_ai} model name"
|
1426
|
-
|
1427
|
-
model_name = typer.prompt(
|
1428
|
-
prompt,
|
1429
|
-
default="text-embedding-3-small",
|
1430
|
-
type=click.types.Choice(available_models),
|
1431
|
-
show_choices=False,
|
1432
|
-
)
|
1433
|
-
params = dict(
|
1434
|
-
model=model_name,
|
1435
|
-
)
|
1436
|
-
max_tokens = EMBEDDING_TOKEN_LIMITS[model_name]
|
1437
|
-
model_cost = EMBEDDING_COST_PER_MODEL[model_name]
|
1438
|
-
cfg = {
|
1439
|
-
"model_type": model_type,
|
1440
|
-
"model_identifier": model_name,
|
1441
|
-
"model_args": params,
|
1442
|
-
"token_limit": max_tokens,
|
1443
|
-
"model_cost": model_cost,
|
1444
|
-
}
|
1445
|
-
else:
|
1446
|
-
raise ValueError(f"Unknown model type {model_type}")
|
1447
|
-
with open(model_cfg, "w") as f:
|
1448
|
-
json.dump(cfg, f, indent=2)
|
1449
|
-
print(f"Model config written to {model_cfg}")
|
1450
|
-
|
1451
|
-
|
1452
|
-
app.add_typer(db, name="db")
|
1453
|
-
app.add_typer(llm, name="llm")
|
1454
|
-
app.add_typer(evaluate, name="evaluate")
|
1455
|
-
app.add_typer(embedding, name="embedding")
|
1456
|
-
|
1457
|
-
|
1458
|
-
@app.command()
|
1459
|
-
def render(
|
1460
|
-
input_dir: Annotated[
|
1461
|
-
str,
|
1462
|
-
typer.Option(
|
1463
|
-
"--input",
|
1464
|
-
"-i",
|
1465
|
-
),
|
1466
|
-
],
|
1467
|
-
output_dir: Annotated[str, typer.Option("--output", "-o")],
|
1468
|
-
):
|
1469
|
-
input_dir = Path(input_dir)
|
1470
|
-
output_dir = Path(output_dir)
|
1471
|
-
for input_file in input_dir.rglob("*.json"):
|
1472
|
-
with open(input_file, "r") as f:
|
1473
|
-
data = json.load(f)
|
1474
|
-
|
1475
|
-
output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
|
1476
|
-
if not output_file.parent.exists():
|
1477
|
-
output_file.parent.mkdir()
|
1478
|
-
|
1479
|
-
text = data["output"].replace("\\n", "\n").strip()
|
1480
|
-
output_file.write_text(text)
|
1481
|
-
|
1482
|
-
jar_path = homedir / ".janus/lib/plantuml.jar"
|
1483
|
-
subprocess.run(["java", "-jar", jar_path, output_file]) # nosec
|
1484
|
-
output_file.unlink()
|
1485
|
-
|
1486
|
-
|
1487
|
-
if __name__ == "__main__":
|
1488
|
-
app()
|