janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +111 -0
- janus/cli/constants.py +43 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +178 -0
- janus/cli/document.py +174 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +187 -0
- janus/cli/partition.py +125 -0
- janus/cli/self_eval.py +149 -0
- janus/cli/translate.py +183 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +2 -0
- janus/converter/converter.py +129 -92
- janus/converter/document.py +21 -14
- janus/converter/evaluate.py +237 -4
- janus/converter/translate.py +3 -3
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/alc/_tests/test_alc.py +1 -1
- janus/language/alc/alc.py +9 -4
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +47 -12
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/splitter.py +31 -23
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/language/treesitter/treesitter.py +9 -1
- janus/llm/models_info.py +26 -13
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/eval_parsers/incose_parser.py +134 -0
- janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +47 -13
- janus/parsers/reqs_parser.py +8 -5
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +143 -4
- janus/utils/enums.py +140 -111
- janus/utils/logger.py +2 -0
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
- janus_llm-4.3.5.dist-info/RECORD +210 -0
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
- janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1343
- janus_llm-4.2.0.dist-info/RECORD +0 -113
- janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
- {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0
janus/cli/database.py
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import typer
|
5
|
+
from typing_extensions import Annotated
|
6
|
+
|
7
|
+
from janus.cli.constants import db_loc, janus_dir
|
8
|
+
|
9
|
+
db = typer.Typer(
|
10
|
+
help="Database commands",
|
11
|
+
add_completion=False,
|
12
|
+
no_args_is_help=True,
|
13
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
@db.command("init", help="Connect to or create a database.")
|
18
|
+
def db_init(
|
19
|
+
path: Annotated[
|
20
|
+
str, typer.Option("--path", "-p", help="The path to the database file.")
|
21
|
+
] = str(janus_dir / "chroma.db"),
|
22
|
+
url: Annotated[
|
23
|
+
str,
|
24
|
+
typer.Option(
|
25
|
+
"--url",
|
26
|
+
"-u",
|
27
|
+
help="The URL of the database if the database is running externally.",
|
28
|
+
),
|
29
|
+
] = "",
|
30
|
+
) -> None:
|
31
|
+
import os
|
32
|
+
|
33
|
+
from rich import print
|
34
|
+
|
35
|
+
from janus.cli.constants import db_file
|
36
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
37
|
+
|
38
|
+
global db_loc
|
39
|
+
if url != "":
|
40
|
+
print(f"Pointing to Chroma DB at {url}")
|
41
|
+
with open(db_file, "w") as f:
|
42
|
+
f.write(url)
|
43
|
+
db_loc = url
|
44
|
+
else:
|
45
|
+
path = os.path.abspath(path)
|
46
|
+
print(f"Setting up Chroma DB at {path}")
|
47
|
+
with open(db_file, "w") as f:
|
48
|
+
f.write(path)
|
49
|
+
db_loc = path
|
50
|
+
global embedding_db
|
51
|
+
embedding_db = ChromaEmbeddingDatabase(db_loc)
|
52
|
+
|
53
|
+
|
54
|
+
@db.command("status", help="Print current database location.")
|
55
|
+
def db_status():
|
56
|
+
from rich import print
|
57
|
+
|
58
|
+
print(f"Chroma DB currently pointing to {db_loc}")
|
59
|
+
|
60
|
+
|
61
|
+
@db.command(
|
62
|
+
"ls",
|
63
|
+
help="List the current database's collections. Or supply a collection name to list "
|
64
|
+
"information about its contents.",
|
65
|
+
)
|
66
|
+
def db_ls(
|
67
|
+
collection_name: Annotated[
|
68
|
+
Optional[str], typer.Argument(help="The name of the collection.")
|
69
|
+
] = None,
|
70
|
+
peek: Annotated[
|
71
|
+
Optional[int],
|
72
|
+
typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
|
73
|
+
] = None,
|
74
|
+
) -> None:
|
75
|
+
"""List the current database's collections"""
|
76
|
+
from rich import print
|
77
|
+
|
78
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
79
|
+
|
80
|
+
if peek is not None and collection_name is None:
|
81
|
+
print(
|
82
|
+
"\n[bold red]Cannot peek at all collections. Please specify a "
|
83
|
+
"collection by name.[/bold red]"
|
84
|
+
)
|
85
|
+
return
|
86
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
87
|
+
from janus.embedding.collections import Collections
|
88
|
+
|
89
|
+
collections = Collections(db)
|
90
|
+
collection_list = collections.get(collection_name)
|
91
|
+
for collection in collection_list:
|
92
|
+
print(
|
93
|
+
f"\n[bold underline]Collection[/bold underline]: "
|
94
|
+
f"[bold salmon1]{collection.name}[/bold salmon1]"
|
95
|
+
)
|
96
|
+
print(f" ID: {collection.id}")
|
97
|
+
print(f" Metadata: {collection.metadata}")
|
98
|
+
print(f" Tenant: [green]{collection.tenant}[/green]")
|
99
|
+
print(f" Database: [green]{collection.database}[/green]")
|
100
|
+
print(f" Length: {collection.count()}")
|
101
|
+
if peek:
|
102
|
+
entry = collection.peek(peek)
|
103
|
+
entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
|
104
|
+
if peek == 1:
|
105
|
+
print(" [bold]Peeking at first entry[/bold]:")
|
106
|
+
else:
|
107
|
+
print(f" [bold]Peeking at first {peek} entries[/bold]:")
|
108
|
+
print(entry)
|
109
|
+
print()
|
110
|
+
|
111
|
+
|
112
|
+
@db.command("add", help="Add a collection to the current database.")
|
113
|
+
def db_add(
|
114
|
+
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
115
|
+
model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
|
116
|
+
input_dir: Annotated[
|
117
|
+
str,
|
118
|
+
typer.Option(
|
119
|
+
"--input",
|
120
|
+
"-i",
|
121
|
+
help="The directory containing the source code to be added.",
|
122
|
+
),
|
123
|
+
] = "./",
|
124
|
+
input_lang: Annotated[
|
125
|
+
str, typer.Option("--language", "-l", help="The language of the source code.")
|
126
|
+
] = "python",
|
127
|
+
max_tokens: Annotated[
|
128
|
+
int,
|
129
|
+
typer.Option(
|
130
|
+
"--max-tokens",
|
131
|
+
"-m",
|
132
|
+
help="The maximum number of tokens for each chunk of input source code.",
|
133
|
+
),
|
134
|
+
] = 4096,
|
135
|
+
) -> None:
|
136
|
+
"""Add a collection to the database
|
137
|
+
|
138
|
+
Arguments:
|
139
|
+
collection_name: The name of the collection to add
|
140
|
+
model_name: The name of the embedding model to use
|
141
|
+
input_dir: The directory containing the source code to be added
|
142
|
+
input_lang: The language of the source code
|
143
|
+
max_tokens: The maximum number of tokens for each chunk of input source code
|
144
|
+
"""
|
145
|
+
# TODO: import factory
|
146
|
+
import json
|
147
|
+
from pathlib import Path
|
148
|
+
|
149
|
+
from rich.console import Console
|
150
|
+
|
151
|
+
from janus.cli.constants import collections_config_file, get_collections_config
|
152
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
153
|
+
from janus.language.binary import BinarySplitter
|
154
|
+
from janus.language.mumps import MumpsSplitter
|
155
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
156
|
+
from janus.language.treesitter import TreeSitterSplitter
|
157
|
+
from janus.utils.enums import LANGUAGES
|
158
|
+
|
159
|
+
console = Console()
|
160
|
+
|
161
|
+
added_to = _check_collection(collection_name, input_dir)
|
162
|
+
collections_config = get_collections_config()
|
163
|
+
|
164
|
+
with console.status(
|
165
|
+
f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
|
166
|
+
spinner="arrow3",
|
167
|
+
):
|
168
|
+
vectorizer_factory = ChromaDBVectorizer()
|
169
|
+
vectorizer = vectorizer_factory.create_vectorizer(
|
170
|
+
path=db_loc, config=collections_config
|
171
|
+
)
|
172
|
+
vectorizer.get_or_create_collection(collection_name, model_name=model_name)
|
173
|
+
input_dir = Path(input_dir)
|
174
|
+
suffixes = [f".{ext}" for ext in LANGUAGES[input_lang]["suffixes"]]
|
175
|
+
input_paths = [file for ext in suffixes for file in input_dir.rglob(f"**/*{ext}")]
|
176
|
+
|
177
|
+
if input_lang in CUSTOM_SPLITTERS:
|
178
|
+
if input_lang == "mumps":
|
179
|
+
splitter = MumpsSplitter(
|
180
|
+
max_tokens=max_tokens,
|
181
|
+
)
|
182
|
+
elif input_lang == "binary":
|
183
|
+
splitter = BinarySplitter(
|
184
|
+
max_tokens=max_tokens,
|
185
|
+
)
|
186
|
+
else:
|
187
|
+
splitter = TreeSitterSplitter(
|
188
|
+
language=input_lang,
|
189
|
+
max_tokens=max_tokens,
|
190
|
+
)
|
191
|
+
for input_path in input_paths:
|
192
|
+
input_block = splitter.split(input_path)
|
193
|
+
vectorizer.add_nodes_recursively(
|
194
|
+
input_block,
|
195
|
+
collection_name,
|
196
|
+
input_path.name,
|
197
|
+
)
|
198
|
+
total_files = len(
|
199
|
+
[path for path in Path.glob(input_dir, "**/*") if not path.is_dir()]
|
200
|
+
)
|
201
|
+
if added_to:
|
202
|
+
print(
|
203
|
+
f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
204
|
+
f" Embedding Model: [green]{model_name}[/green]\n"
|
205
|
+
f" Input Directory: {input_dir.absolute()}\n"
|
206
|
+
f" {input_lang} [green]{suffixes}[/green] Files: "
|
207
|
+
f"{len(input_paths)}\n"
|
208
|
+
" Other Files (skipped): "
|
209
|
+
f"{total_files - len(input_paths)}\n"
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
print(
|
213
|
+
f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
214
|
+
f" Embedding Model: '{model_name}'\n"
|
215
|
+
f" Input Directory: {input_dir.absolute()}\n"
|
216
|
+
f" {input_lang} [green]{suffixes}[/green] Files: "
|
217
|
+
f"{len(input_paths)}\n"
|
218
|
+
" Other Files (skipped): "
|
219
|
+
f"{total_files - len(input_paths)}\n"
|
220
|
+
)
|
221
|
+
with open(collections_config_file, "w") as f:
|
222
|
+
json.dump(vectorizer.config, f, indent=2)
|
223
|
+
|
224
|
+
|
225
|
+
@db.command(
|
226
|
+
"rm",
|
227
|
+
help="Remove a collection from the database.",
|
228
|
+
)
|
229
|
+
def db_rm(
|
230
|
+
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
231
|
+
confirm: Annotated[
|
232
|
+
bool,
|
233
|
+
typer.Option(
|
234
|
+
"--yes",
|
235
|
+
"-y",
|
236
|
+
help="Confirm the removal of the collection.",
|
237
|
+
),
|
238
|
+
],
|
239
|
+
) -> None:
|
240
|
+
"""Remove a collection from the database
|
241
|
+
|
242
|
+
Arguments:
|
243
|
+
collection_name: The name of the collection to remove
|
244
|
+
"""
|
245
|
+
from rich.prompt import Confirm
|
246
|
+
|
247
|
+
from janus.embedding.collections import Collections
|
248
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
249
|
+
|
250
|
+
if not confirm:
|
251
|
+
delete = Confirm.ask(
|
252
|
+
f"\nAre you sure you want to [bold red]remove[/bold red] "
|
253
|
+
f"[bold salmon1]{collection_name}[/bold salmon1]?",
|
254
|
+
)
|
255
|
+
else:
|
256
|
+
delete = True
|
257
|
+
if not delete:
|
258
|
+
raise typer.Abort()
|
259
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
260
|
+
collections = Collections(db)
|
261
|
+
collections.delete(collection_name)
|
262
|
+
print(
|
263
|
+
f"[bold red]Removed[/bold red] collection "
|
264
|
+
f"[bold salmon1]{collection_name}[/bold salmon1]"
|
265
|
+
)
|
266
|
+
|
267
|
+
|
268
|
+
def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
|
269
|
+
from chromadb.errors import InvalidCollectionException
|
270
|
+
|
271
|
+
from janus.embedding.collections import Collections
|
272
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
273
|
+
|
274
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
275
|
+
collections = Collections(db)
|
276
|
+
added_to = False
|
277
|
+
try:
|
278
|
+
collections.get(collection_name)
|
279
|
+
# confirm_add = Confirm.ask(
|
280
|
+
# f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
|
281
|
+
# "you sure you want to update it with the contents of"
|
282
|
+
# f"[bold green]{input_dir}[/bold green]?"
|
283
|
+
# )
|
284
|
+
added_to = True
|
285
|
+
# if not confirm_add:
|
286
|
+
# raise typer.Abort()
|
287
|
+
except InvalidCollectionException:
|
288
|
+
pass
|
289
|
+
return added_to
|
janus/cli/diagram.py
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def diagram(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be translated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option(
|
35
|
+
"--output-dir", "-o", help="The directory to store the translated code in."
|
36
|
+
),
|
37
|
+
],
|
38
|
+
llm_name: Annotated[
|
39
|
+
str,
|
40
|
+
typer.Option(
|
41
|
+
"--llm",
|
42
|
+
"-L",
|
43
|
+
help="The custom name of the model set with 'janus llm add'.",
|
44
|
+
),
|
45
|
+
],
|
46
|
+
failure_dir: Annotated[
|
47
|
+
Optional[Path],
|
48
|
+
typer.Option(
|
49
|
+
"--failure-directory",
|
50
|
+
"-f",
|
51
|
+
help="The directory to store failure files during translation",
|
52
|
+
),
|
53
|
+
] = None,
|
54
|
+
max_prompts: Annotated[
|
55
|
+
int,
|
56
|
+
typer.Option(
|
57
|
+
"--max-prompts",
|
58
|
+
"-m",
|
59
|
+
help="The maximum number of times to prompt a model on one functional block "
|
60
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
61
|
+
),
|
62
|
+
] = 10,
|
63
|
+
overwrite: Annotated[
|
64
|
+
bool,
|
65
|
+
typer.Option(
|
66
|
+
"--overwrite/--preserve",
|
67
|
+
help="Whether to overwrite existing files in the output directory",
|
68
|
+
),
|
69
|
+
] = False,
|
70
|
+
temperature: Annotated[
|
71
|
+
float,
|
72
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
73
|
+
] = 0.7,
|
74
|
+
collection: Annotated[
|
75
|
+
str,
|
76
|
+
typer.Option(
|
77
|
+
"--collection",
|
78
|
+
"-c",
|
79
|
+
help="If set, will put the translated result into a Chroma DB "
|
80
|
+
"collection with the name provided.",
|
81
|
+
),
|
82
|
+
] = None,
|
83
|
+
diagram_type: Annotated[
|
84
|
+
str,
|
85
|
+
typer.Option(
|
86
|
+
"--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
|
87
|
+
),
|
88
|
+
] = "Activity",
|
89
|
+
add_documentation: Annotated[
|
90
|
+
bool,
|
91
|
+
typer.Option(
|
92
|
+
"--add-documentation/--no-documentation",
|
93
|
+
"-ad",
|
94
|
+
help="Whether to use documentation in generation",
|
95
|
+
),
|
96
|
+
] = False,
|
97
|
+
splitter_type: Annotated[
|
98
|
+
str,
|
99
|
+
typer.Option(
|
100
|
+
"-S",
|
101
|
+
"--splitter",
|
102
|
+
help="Name of custom splitter to use",
|
103
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
104
|
+
),
|
105
|
+
] = "file",
|
106
|
+
refiner_types: Annotated[
|
107
|
+
list[str],
|
108
|
+
typer.Option(
|
109
|
+
"-r",
|
110
|
+
"--refiner",
|
111
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
112
|
+
refinement chain",
|
113
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
114
|
+
),
|
115
|
+
] = ["JanusRefiner"],
|
116
|
+
retriever_type: Annotated[
|
117
|
+
str,
|
118
|
+
typer.Option(
|
119
|
+
"-R",
|
120
|
+
"--retriever",
|
121
|
+
help="Name of custom retriever to use",
|
122
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
123
|
+
),
|
124
|
+
] = None,
|
125
|
+
):
|
126
|
+
from janus.cli.constants import db_loc, get_collections_config
|
127
|
+
from janus.converter.diagram import DiagramGenerator
|
128
|
+
|
129
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
130
|
+
model_arguments = dict(temperature=temperature)
|
131
|
+
collections_config = get_collections_config()
|
132
|
+
diagram_generator = DiagramGenerator(
|
133
|
+
model=llm_name,
|
134
|
+
model_arguments=model_arguments,
|
135
|
+
source_language=language,
|
136
|
+
max_prompts=max_prompts,
|
137
|
+
db_path=db_loc,
|
138
|
+
db_config=collections_config,
|
139
|
+
splitter_type=splitter_type,
|
140
|
+
refiner_types=refiner_types,
|
141
|
+
retriever_type=retriever_type,
|
142
|
+
diagram_type=diagram_type,
|
143
|
+
add_documentation=add_documentation,
|
144
|
+
)
|
145
|
+
diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
146
|
+
|
147
|
+
|
148
|
+
def render(
|
149
|
+
input_dir: Annotated[
|
150
|
+
str,
|
151
|
+
typer.Option(
|
152
|
+
"--input",
|
153
|
+
"-i",
|
154
|
+
),
|
155
|
+
],
|
156
|
+
output_dir: Annotated[str, typer.Option("--output", "-o")],
|
157
|
+
):
|
158
|
+
import json
|
159
|
+
import subprocess # nosec
|
160
|
+
|
161
|
+
from janus.cli.constants import homedir
|
162
|
+
|
163
|
+
input_dir = Path(input_dir)
|
164
|
+
output_dir = Path(output_dir)
|
165
|
+
for input_file in input_dir.rglob("*.json"):
|
166
|
+
with open(input_file, "r") as f:
|
167
|
+
data = json.load(f)
|
168
|
+
|
169
|
+
output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
|
170
|
+
if not output_file.parent.exists():
|
171
|
+
output_file.parent.mkdir()
|
172
|
+
|
173
|
+
text = data["output"].replace("\\n", "\n").strip()
|
174
|
+
output_file.write_text(text)
|
175
|
+
|
176
|
+
jar_path = homedir / ".janus/lib/plantuml.jar"
|
177
|
+
subprocess.run(["java", "-jar", jar_path, output_file]) # nosec
|
178
|
+
output_file.unlink()
|
janus/cli/document.py
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def document(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be translated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option(
|
35
|
+
"--output-dir", "-o", help="The directory to store the translated code in."
|
36
|
+
),
|
37
|
+
],
|
38
|
+
llm_name: Annotated[
|
39
|
+
str,
|
40
|
+
typer.Option(
|
41
|
+
"--llm",
|
42
|
+
"-L",
|
43
|
+
help="The custom name of the model set with 'janus llm add'.",
|
44
|
+
),
|
45
|
+
],
|
46
|
+
failure_dir: Annotated[
|
47
|
+
Optional[Path],
|
48
|
+
typer.Option(
|
49
|
+
"--failure-directory",
|
50
|
+
"-f",
|
51
|
+
help="The directory to store failure files during documentation",
|
52
|
+
),
|
53
|
+
] = None,
|
54
|
+
max_prompts: Annotated[
|
55
|
+
int,
|
56
|
+
typer.Option(
|
57
|
+
"--max-prompts",
|
58
|
+
"-m",
|
59
|
+
help="The maximum number of times to prompt a model on one functional block "
|
60
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
61
|
+
),
|
62
|
+
] = 10,
|
63
|
+
overwrite: Annotated[
|
64
|
+
bool,
|
65
|
+
typer.Option(
|
66
|
+
"--overwrite/--preserve",
|
67
|
+
help="Whether to overwrite existing files in the output directory",
|
68
|
+
),
|
69
|
+
] = False,
|
70
|
+
doc_mode: Annotated[
|
71
|
+
str,
|
72
|
+
typer.Option(
|
73
|
+
"--doc-mode",
|
74
|
+
"-d",
|
75
|
+
help="The documentation mode.",
|
76
|
+
click_type=click.Choice(["cloze", "summary", "multidoc", "requirements"]),
|
77
|
+
),
|
78
|
+
] = "cloze",
|
79
|
+
comments_per_request: Annotated[
|
80
|
+
int,
|
81
|
+
typer.Option(
|
82
|
+
"--comments-per-request",
|
83
|
+
"-rc",
|
84
|
+
help="The maximum number of comments to generate per request when using "
|
85
|
+
"Cloze documentation mode.",
|
86
|
+
),
|
87
|
+
] = None,
|
88
|
+
drop_comments: Annotated[
|
89
|
+
bool,
|
90
|
+
typer.Option(
|
91
|
+
"--drop-comments/--keep-comments",
|
92
|
+
help="Whether to drop or keep comments in the code sent to the LLM",
|
93
|
+
),
|
94
|
+
] = False,
|
95
|
+
temperature: Annotated[
|
96
|
+
float,
|
97
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
98
|
+
] = 0.7,
|
99
|
+
collection: Annotated[
|
100
|
+
str,
|
101
|
+
typer.Option(
|
102
|
+
"--collection",
|
103
|
+
"-c",
|
104
|
+
help="If set, will put the translated result into a Chroma DB "
|
105
|
+
"collection with the name provided.",
|
106
|
+
),
|
107
|
+
] = None,
|
108
|
+
splitter_type: Annotated[
|
109
|
+
str,
|
110
|
+
typer.Option(
|
111
|
+
"-S",
|
112
|
+
"--splitter",
|
113
|
+
help="Name of custom splitter to use",
|
114
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
115
|
+
),
|
116
|
+
] = "file",
|
117
|
+
refiner_types: Annotated[
|
118
|
+
list[str],
|
119
|
+
typer.Option(
|
120
|
+
"-r",
|
121
|
+
"--refiner",
|
122
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
123
|
+
refinement chain",
|
124
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
125
|
+
),
|
126
|
+
] = ["JanusRefiner"],
|
127
|
+
retriever_type: Annotated[
|
128
|
+
str,
|
129
|
+
typer.Option(
|
130
|
+
"-R",
|
131
|
+
"--retriever",
|
132
|
+
help="Name of custom retriever to use",
|
133
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
134
|
+
),
|
135
|
+
] = None,
|
136
|
+
max_tokens: Annotated[
|
137
|
+
int,
|
138
|
+
typer.Option(
|
139
|
+
"--max-tokens",
|
140
|
+
"-M",
|
141
|
+
help="The maximum number of tokens the model will take in. "
|
142
|
+
"If unspecificed, model's default max will be used.",
|
143
|
+
),
|
144
|
+
] = None,
|
145
|
+
):
|
146
|
+
from janus.cli.constants import db_loc, get_collections_config
|
147
|
+
from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
|
148
|
+
from janus.converter.requirements import RequirementsDocumenter
|
149
|
+
|
150
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
151
|
+
model_arguments = dict(temperature=temperature)
|
152
|
+
collections_config = get_collections_config()
|
153
|
+
kwargs = dict(
|
154
|
+
model=llm_name,
|
155
|
+
model_arguments=model_arguments,
|
156
|
+
source_language=language,
|
157
|
+
max_prompts=max_prompts,
|
158
|
+
max_tokens=max_tokens,
|
159
|
+
db_path=db_loc,
|
160
|
+
db_config=collections_config,
|
161
|
+
splitter_type=splitter_type,
|
162
|
+
refiner_types=refiner_types,
|
163
|
+
retriever_type=retriever_type,
|
164
|
+
)
|
165
|
+
if doc_mode == "cloze":
|
166
|
+
documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
|
167
|
+
elif doc_mode == "multidoc":
|
168
|
+
documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
|
169
|
+
elif doc_mode == "requirements":
|
170
|
+
documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
|
171
|
+
else:
|
172
|
+
documenter = Documenter(drop_comments=drop_comments, **kwargs)
|
173
|
+
|
174
|
+
documenter.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|