janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +117 -0
- janus/cli/constants.py +49 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +207 -0
- janus/cli/document.py +183 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +191 -0
- janus/cli/partition.py +134 -0
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +147 -0
- janus/cli/translate.py +192 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +7 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +444 -153
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +27 -16
- janus/converter/evaluate.py +143 -144
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +3 -59
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +78 -14
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/llm/models_info.py +9 -1
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/metric.py +47 -124
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +6 -4
- janus/parsers/reqs_parser.py +11 -8
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +113 -4
- janus/utils/enums.py +127 -112
- janus/utils/logger.py +2 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
- janus_llm-4.4.5.dist-info/RECORD +210 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
- janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1488
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- janus_llm-4.3.1.dist-info/RECORD +0 -115
- janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/database.py
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import typer
|
5
|
+
from typing_extensions import Annotated
|
6
|
+
|
7
|
+
from janus.cli.constants import db_loc, janus_dir
|
8
|
+
|
9
|
+
db = typer.Typer(
|
10
|
+
help="Database commands",
|
11
|
+
add_completion=False,
|
12
|
+
no_args_is_help=True,
|
13
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
@db.command("init", help="Connect to or create a database.")
|
18
|
+
def db_init(
|
19
|
+
path: Annotated[
|
20
|
+
str, typer.Option("--path", "-p", help="The path to the database file.")
|
21
|
+
] = str(janus_dir / "chroma.db"),
|
22
|
+
url: Annotated[
|
23
|
+
str,
|
24
|
+
typer.Option(
|
25
|
+
"--url",
|
26
|
+
"-u",
|
27
|
+
help="The URL of the database if the database is running externally.",
|
28
|
+
),
|
29
|
+
] = "",
|
30
|
+
) -> None:
|
31
|
+
import os
|
32
|
+
|
33
|
+
from rich import print
|
34
|
+
|
35
|
+
from janus.cli.constants import db_file
|
36
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
37
|
+
|
38
|
+
global db_loc
|
39
|
+
if url != "":
|
40
|
+
print(f"Pointing to Chroma DB at {url}")
|
41
|
+
with open(db_file, "w") as f:
|
42
|
+
f.write(url)
|
43
|
+
db_loc = url
|
44
|
+
else:
|
45
|
+
path = os.path.abspath(path)
|
46
|
+
print(f"Setting up Chroma DB at {path}")
|
47
|
+
with open(db_file, "w") as f:
|
48
|
+
f.write(path)
|
49
|
+
db_loc = path
|
50
|
+
global embedding_db
|
51
|
+
embedding_db = ChromaEmbeddingDatabase(db_loc)
|
52
|
+
|
53
|
+
|
54
|
+
@db.command("status", help="Print current database location.")
|
55
|
+
def db_status():
|
56
|
+
from rich import print
|
57
|
+
|
58
|
+
print(f"Chroma DB currently pointing to {db_loc}")
|
59
|
+
|
60
|
+
|
61
|
+
@db.command(
|
62
|
+
"ls",
|
63
|
+
help="List the current database's collections. Or supply a collection name to list "
|
64
|
+
"information about its contents.",
|
65
|
+
)
|
66
|
+
def db_ls(
|
67
|
+
collection_name: Annotated[
|
68
|
+
Optional[str], typer.Argument(help="The name of the collection.")
|
69
|
+
] = None,
|
70
|
+
peek: Annotated[
|
71
|
+
Optional[int],
|
72
|
+
typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
|
73
|
+
] = None,
|
74
|
+
) -> None:
|
75
|
+
"""List the current database's collections"""
|
76
|
+
from rich import print
|
77
|
+
|
78
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
79
|
+
|
80
|
+
if peek is not None and collection_name is None:
|
81
|
+
print(
|
82
|
+
"\n[bold red]Cannot peek at all collections. Please specify a "
|
83
|
+
"collection by name.[/bold red]"
|
84
|
+
)
|
85
|
+
return
|
86
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
87
|
+
from janus.embedding.collections import Collections
|
88
|
+
|
89
|
+
collections = Collections(db)
|
90
|
+
collection_list = collections.get(collection_name)
|
91
|
+
for collection in collection_list:
|
92
|
+
print(
|
93
|
+
f"\n[bold underline]Collection[/bold underline]: "
|
94
|
+
f"[bold salmon1]{collection.name}[/bold salmon1]"
|
95
|
+
)
|
96
|
+
print(f" ID: {collection.id}")
|
97
|
+
print(f" Metadata: {collection.metadata}")
|
98
|
+
print(f" Tenant: [green]{collection.tenant}[/green]")
|
99
|
+
print(f" Database: [green]{collection.database}[/green]")
|
100
|
+
print(f" Length: {collection.count()}")
|
101
|
+
if peek:
|
102
|
+
entry = collection.peek(peek)
|
103
|
+
entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
|
104
|
+
if peek == 1:
|
105
|
+
print(" [bold]Peeking at first entry[/bold]:")
|
106
|
+
else:
|
107
|
+
print(f" [bold]Peeking at first {peek} entries[/bold]:")
|
108
|
+
print(entry)
|
109
|
+
print()
|
110
|
+
|
111
|
+
|
112
|
+
@db.command("add", help="Add a collection to the current database.")
|
113
|
+
def db_add(
|
114
|
+
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
115
|
+
model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
|
116
|
+
input_dir: Annotated[
|
117
|
+
str,
|
118
|
+
typer.Option(
|
119
|
+
"--input",
|
120
|
+
"-i",
|
121
|
+
help="The directory containing the source code to be added.",
|
122
|
+
),
|
123
|
+
] = "./",
|
124
|
+
input_lang: Annotated[
|
125
|
+
str, typer.Option("--language", "-l", help="The language of the source code.")
|
126
|
+
] = "python",
|
127
|
+
max_tokens: Annotated[
|
128
|
+
int,
|
129
|
+
typer.Option(
|
130
|
+
"--max-tokens",
|
131
|
+
"-m",
|
132
|
+
help="The maximum number of tokens for each chunk of input source code.",
|
133
|
+
),
|
134
|
+
] = 4096,
|
135
|
+
) -> None:
|
136
|
+
"""Add a collection to the database
|
137
|
+
|
138
|
+
Arguments:
|
139
|
+
collection_name: The name of the collection to add
|
140
|
+
model_name: The name of the embedding model to use
|
141
|
+
input_dir: The directory containing the source code to be added
|
142
|
+
input_lang: The language of the source code
|
143
|
+
max_tokens: The maximum number of tokens for each chunk of input source code
|
144
|
+
"""
|
145
|
+
# TODO: import factory
|
146
|
+
import json
|
147
|
+
from pathlib import Path
|
148
|
+
|
149
|
+
from rich.console import Console
|
150
|
+
|
151
|
+
from janus.cli.constants import collections_config_file, get_collections_config
|
152
|
+
from janus.embedding.vectorize import ChromaDBVectorizer
|
153
|
+
from janus.language.binary import BinarySplitter
|
154
|
+
from janus.language.mumps import MumpsSplitter
|
155
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
156
|
+
from janus.language.treesitter import TreeSitterSplitter
|
157
|
+
from janus.utils.enums import LANGUAGES
|
158
|
+
|
159
|
+
console = Console()
|
160
|
+
|
161
|
+
added_to = _check_collection(collection_name, input_dir)
|
162
|
+
collections_config = get_collections_config()
|
163
|
+
|
164
|
+
with console.status(
|
165
|
+
f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
|
166
|
+
spinner="arrow3",
|
167
|
+
):
|
168
|
+
vectorizer_factory = ChromaDBVectorizer()
|
169
|
+
vectorizer = vectorizer_factory.create_vectorizer(
|
170
|
+
path=db_loc, config=collections_config
|
171
|
+
)
|
172
|
+
vectorizer.get_or_create_collection(collection_name, model_name=model_name)
|
173
|
+
input_dir = Path(input_dir)
|
174
|
+
suffixes = [f".{ext}" for ext in LANGUAGES[input_lang]["suffixes"]]
|
175
|
+
input_paths = [file for ext in suffixes for file in input_dir.rglob(f"**/*{ext}")]
|
176
|
+
|
177
|
+
if input_lang in CUSTOM_SPLITTERS:
|
178
|
+
if input_lang == "mumps":
|
179
|
+
splitter = MumpsSplitter(
|
180
|
+
max_tokens=max_tokens,
|
181
|
+
)
|
182
|
+
elif input_lang == "binary":
|
183
|
+
splitter = BinarySplitter(
|
184
|
+
max_tokens=max_tokens,
|
185
|
+
)
|
186
|
+
else:
|
187
|
+
splitter = TreeSitterSplitter(
|
188
|
+
language=input_lang,
|
189
|
+
max_tokens=max_tokens,
|
190
|
+
)
|
191
|
+
for input_path in input_paths:
|
192
|
+
input_block = splitter.split(input_path)
|
193
|
+
vectorizer.add_nodes_recursively(
|
194
|
+
input_block,
|
195
|
+
collection_name,
|
196
|
+
input_path.name,
|
197
|
+
)
|
198
|
+
total_files = len(
|
199
|
+
[path for path in Path.glob(input_dir, "**/*") if not path.is_dir()]
|
200
|
+
)
|
201
|
+
if added_to:
|
202
|
+
print(
|
203
|
+
f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
204
|
+
f" Embedding Model: [green]{model_name}[/green]\n"
|
205
|
+
f" Input Directory: {input_dir.absolute()}\n"
|
206
|
+
f" {input_lang} [green]{suffixes}[/green] Files: "
|
207
|
+
f"{len(input_paths)}\n"
|
208
|
+
" Other Files (skipped): "
|
209
|
+
f"{total_files - len(input_paths)}\n"
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
print(
|
213
|
+
f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
|
214
|
+
f" Embedding Model: '{model_name}'\n"
|
215
|
+
f" Input Directory: {input_dir.absolute()}\n"
|
216
|
+
f" {input_lang} [green]{suffixes}[/green] Files: "
|
217
|
+
f"{len(input_paths)}\n"
|
218
|
+
" Other Files (skipped): "
|
219
|
+
f"{total_files - len(input_paths)}\n"
|
220
|
+
)
|
221
|
+
with open(collections_config_file, "w") as f:
|
222
|
+
json.dump(vectorizer.config, f, indent=2)
|
223
|
+
|
224
|
+
|
225
|
+
@db.command(
|
226
|
+
"rm",
|
227
|
+
help="Remove a collection from the database.",
|
228
|
+
)
|
229
|
+
def db_rm(
|
230
|
+
collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
|
231
|
+
confirm: Annotated[
|
232
|
+
bool,
|
233
|
+
typer.Option(
|
234
|
+
"--yes",
|
235
|
+
"-y",
|
236
|
+
help="Confirm the removal of the collection.",
|
237
|
+
),
|
238
|
+
],
|
239
|
+
) -> None:
|
240
|
+
"""Remove a collection from the database
|
241
|
+
|
242
|
+
Arguments:
|
243
|
+
collection_name: The name of the collection to remove
|
244
|
+
"""
|
245
|
+
from rich.prompt import Confirm
|
246
|
+
|
247
|
+
from janus.embedding.collections import Collections
|
248
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
249
|
+
|
250
|
+
if not confirm:
|
251
|
+
delete = Confirm.ask(
|
252
|
+
f"\nAre you sure you want to [bold red]remove[/bold red] "
|
253
|
+
f"[bold salmon1]{collection_name}[/bold salmon1]?",
|
254
|
+
)
|
255
|
+
else:
|
256
|
+
delete = True
|
257
|
+
if not delete:
|
258
|
+
raise typer.Abort()
|
259
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
260
|
+
collections = Collections(db)
|
261
|
+
collections.delete(collection_name)
|
262
|
+
print(
|
263
|
+
f"[bold red]Removed[/bold red] collection "
|
264
|
+
f"[bold salmon1]{collection_name}[/bold salmon1]"
|
265
|
+
)
|
266
|
+
|
267
|
+
|
268
|
+
def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
|
269
|
+
from chromadb.errors import InvalidCollectionException
|
270
|
+
|
271
|
+
from janus.embedding.collections import Collections
|
272
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
273
|
+
|
274
|
+
db = ChromaEmbeddingDatabase(db_loc)
|
275
|
+
collections = Collections(db)
|
276
|
+
added_to = False
|
277
|
+
try:
|
278
|
+
collections.get(collection_name)
|
279
|
+
# confirm_add = Confirm.ask(
|
280
|
+
# f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
|
281
|
+
# "you sure you want to update it with the contents of"
|
282
|
+
# f"[bold green]{input_dir}[/bold green]?"
|
283
|
+
# )
|
284
|
+
added_to = True
|
285
|
+
# if not confirm_add:
|
286
|
+
# raise typer.Abort()
|
287
|
+
except InvalidCollectionException:
|
288
|
+
pass
|
289
|
+
return added_to
|
janus/cli/diagram.py
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def diagram(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be translated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option(
|
35
|
+
"--output", "-o", help="The directory to store the translated code in."
|
36
|
+
),
|
37
|
+
],
|
38
|
+
llm_name: Annotated[
|
39
|
+
str,
|
40
|
+
typer.Option(
|
41
|
+
"--llm",
|
42
|
+
"-L",
|
43
|
+
help="The custom name of the model set with 'janus llm add'.",
|
44
|
+
),
|
45
|
+
],
|
46
|
+
failure_dir: Annotated[
|
47
|
+
Optional[Path],
|
48
|
+
typer.Option(
|
49
|
+
"--failure-directory",
|
50
|
+
"-f",
|
51
|
+
help="The directory to store failure files during translation",
|
52
|
+
),
|
53
|
+
] = None,
|
54
|
+
max_prompts: Annotated[
|
55
|
+
int,
|
56
|
+
typer.Option(
|
57
|
+
"--max-prompts",
|
58
|
+
"-m",
|
59
|
+
help="The maximum number of times to prompt a model on one functional block "
|
60
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
61
|
+
),
|
62
|
+
] = 10,
|
63
|
+
overwrite: Annotated[
|
64
|
+
bool,
|
65
|
+
typer.Option(
|
66
|
+
"--overwrite/--preserve",
|
67
|
+
help="Whether to overwrite existing files in the output directory",
|
68
|
+
),
|
69
|
+
] = False,
|
70
|
+
temperature: Annotated[
|
71
|
+
float,
|
72
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
73
|
+
] = 0.7,
|
74
|
+
collection: Annotated[
|
75
|
+
str,
|
76
|
+
typer.Option(
|
77
|
+
"--collection",
|
78
|
+
"-c",
|
79
|
+
help="If set, will put the translated result into a Chroma DB "
|
80
|
+
"collection with the name provided.",
|
81
|
+
),
|
82
|
+
] = None,
|
83
|
+
diagram_type: Annotated[
|
84
|
+
str,
|
85
|
+
typer.Option(
|
86
|
+
"--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
|
87
|
+
),
|
88
|
+
] = "Activity",
|
89
|
+
add_documentation: Annotated[
|
90
|
+
bool,
|
91
|
+
typer.Option(
|
92
|
+
"--add-documentation/--no-documentation",
|
93
|
+
"-ad",
|
94
|
+
help="Whether to use documentation in generation",
|
95
|
+
),
|
96
|
+
] = False,
|
97
|
+
splitter_type: Annotated[
|
98
|
+
str,
|
99
|
+
typer.Option(
|
100
|
+
"-S",
|
101
|
+
"--splitter",
|
102
|
+
help="Name of custom splitter to use",
|
103
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
104
|
+
),
|
105
|
+
] = "file",
|
106
|
+
refiner_types: Annotated[
|
107
|
+
list[str],
|
108
|
+
typer.Option(
|
109
|
+
"-r",
|
110
|
+
"--refiner",
|
111
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
112
|
+
refinement chain",
|
113
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
114
|
+
),
|
115
|
+
] = ["CodeFormatRefiner"],
|
116
|
+
retriever_type: Annotated[
|
117
|
+
str,
|
118
|
+
typer.Option(
|
119
|
+
"-R",
|
120
|
+
"--retriever",
|
121
|
+
help="Name of custom retriever to use",
|
122
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
123
|
+
),
|
124
|
+
] = None,
|
125
|
+
extract_variables: Annotated[
|
126
|
+
bool,
|
127
|
+
typer.Option(
|
128
|
+
"-ev",
|
129
|
+
"--extract-variables",
|
130
|
+
help="Present when diagram generator should \
|
131
|
+
extract variables before producing diagram",
|
132
|
+
),
|
133
|
+
] = False,
|
134
|
+
use_janus_inputs: Annotated[
|
135
|
+
bool,
|
136
|
+
typer.Option(
|
137
|
+
"-j",
|
138
|
+
"--use-janus-inputs",
|
139
|
+
help="Present when diagram generator should be\
|
140
|
+
be using janus files as inputs",
|
141
|
+
),
|
142
|
+
] = False,
|
143
|
+
):
|
144
|
+
from janus.cli.constants import db_loc, get_collections_config
|
145
|
+
from janus.converter.diagram import DiagramGenerator
|
146
|
+
|
147
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
148
|
+
model_arguments = dict(temperature=temperature)
|
149
|
+
collections_config = get_collections_config()
|
150
|
+
diagram_generator = DiagramGenerator(
|
151
|
+
model=llm_name,
|
152
|
+
model_arguments=model_arguments,
|
153
|
+
source_language=language,
|
154
|
+
max_prompts=max_prompts,
|
155
|
+
db_path=db_loc,
|
156
|
+
db_config=collections_config,
|
157
|
+
splitter_type=splitter_type,
|
158
|
+
refiner_types=refiner_types,
|
159
|
+
retriever_type=retriever_type,
|
160
|
+
diagram_type=diagram_type,
|
161
|
+
add_documentation=add_documentation,
|
162
|
+
extract_variables=extract_variables,
|
163
|
+
use_janus_inputs=use_janus_inputs,
|
164
|
+
)
|
165
|
+
diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
166
|
+
|
167
|
+
|
168
|
+
def render(
|
169
|
+
input_dir: Annotated[
|
170
|
+
str,
|
171
|
+
typer.Option(
|
172
|
+
"--input",
|
173
|
+
"-i",
|
174
|
+
),
|
175
|
+
],
|
176
|
+
output_dir: Annotated[str, typer.Option("--output", "-o")],
|
177
|
+
):
|
178
|
+
import json
|
179
|
+
import subprocess # nosec
|
180
|
+
|
181
|
+
from janus.cli.constants import homedir
|
182
|
+
|
183
|
+
input_dir = Path(input_dir)
|
184
|
+
output_dir = Path(output_dir)
|
185
|
+
for input_file in input_dir.rglob("*.json"):
|
186
|
+
with open(input_file, "r") as f:
|
187
|
+
data = json.load(f)
|
188
|
+
|
189
|
+
output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
|
190
|
+
if not output_file.parent.exists():
|
191
|
+
output_file.parent.mkdir()
|
192
|
+
|
193
|
+
def _render(obj, ind=0):
|
194
|
+
for o in obj["outputs"]:
|
195
|
+
if isinstance(o, dict):
|
196
|
+
ind += _render(o, ind)
|
197
|
+
else:
|
198
|
+
outfile_new = output_file.with_stem(f"{output_file.stem}_{ind}")
|
199
|
+
text = o.replace("\\n", "\n").strip()
|
200
|
+
outfile_new.write_text(text)
|
201
|
+
jar_path = homedir / ".janus/lib/plantuml.jar"
|
202
|
+
subprocess.run(["java", "-jar", jar_path, outfile_new]) # nosec
|
203
|
+
outfile_new.unlink()
|
204
|
+
ind += 1
|
205
|
+
return ind
|
206
|
+
|
207
|
+
_render(data)
|
janus/cli/document.py
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def document(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be translated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option(
|
35
|
+
"--output", "-o", help="The directory to store the translated code in."
|
36
|
+
),
|
37
|
+
],
|
38
|
+
llm_name: Annotated[
|
39
|
+
str,
|
40
|
+
typer.Option(
|
41
|
+
"--llm",
|
42
|
+
"-L",
|
43
|
+
help="The custom name of the model set with 'janus llm add'.",
|
44
|
+
),
|
45
|
+
],
|
46
|
+
failure_dir: Annotated[
|
47
|
+
Optional[Path],
|
48
|
+
typer.Option(
|
49
|
+
"--failure-directory",
|
50
|
+
"-f",
|
51
|
+
help="The directory to store failure files during documentation",
|
52
|
+
),
|
53
|
+
] = None,
|
54
|
+
max_prompts: Annotated[
|
55
|
+
int,
|
56
|
+
typer.Option(
|
57
|
+
"--max-prompts",
|
58
|
+
"-m",
|
59
|
+
help="The maximum number of times to prompt a model on one functional block "
|
60
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
61
|
+
),
|
62
|
+
] = 10,
|
63
|
+
overwrite: Annotated[
|
64
|
+
bool,
|
65
|
+
typer.Option(
|
66
|
+
"--overwrite/--preserve",
|
67
|
+
help="Whether to overwrite existing files in the output directory",
|
68
|
+
),
|
69
|
+
] = False,
|
70
|
+
doc_mode: Annotated[
|
71
|
+
str,
|
72
|
+
typer.Option(
|
73
|
+
"--doc-mode",
|
74
|
+
"-d",
|
75
|
+
help="The documentation mode.",
|
76
|
+
click_type=click.Choice(["cloze", "summary", "multidoc", "requirements"]),
|
77
|
+
),
|
78
|
+
] = "cloze",
|
79
|
+
comments_per_request: Annotated[
|
80
|
+
int,
|
81
|
+
typer.Option(
|
82
|
+
"--comments-per-request",
|
83
|
+
"-rc",
|
84
|
+
help="The maximum number of comments to generate per request when using "
|
85
|
+
"Cloze documentation mode.",
|
86
|
+
),
|
87
|
+
] = None,
|
88
|
+
drop_comments: Annotated[
|
89
|
+
bool,
|
90
|
+
typer.Option(
|
91
|
+
"--drop-comments/--keep-comments",
|
92
|
+
help="Whether to drop or keep comments in the code sent to the LLM",
|
93
|
+
),
|
94
|
+
] = False,
|
95
|
+
temperature: Annotated[
|
96
|
+
float,
|
97
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
98
|
+
] = 0.7,
|
99
|
+
collection: Annotated[
|
100
|
+
str,
|
101
|
+
typer.Option(
|
102
|
+
"--collection",
|
103
|
+
"-c",
|
104
|
+
help="If set, will put the translated result into a Chroma DB "
|
105
|
+
"collection with the name provided.",
|
106
|
+
),
|
107
|
+
] = None,
|
108
|
+
splitter_type: Annotated[
|
109
|
+
str,
|
110
|
+
typer.Option(
|
111
|
+
"-S",
|
112
|
+
"--splitter",
|
113
|
+
help="Name of custom splitter to use",
|
114
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
115
|
+
),
|
116
|
+
] = "file",
|
117
|
+
refiner_types: Annotated[
|
118
|
+
list[str],
|
119
|
+
typer.Option(
|
120
|
+
"-r",
|
121
|
+
"--refiner",
|
122
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
123
|
+
refinement chain",
|
124
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
125
|
+
),
|
126
|
+
] = ["JanusRefiner"],
|
127
|
+
retriever_type: Annotated[
|
128
|
+
str,
|
129
|
+
typer.Option(
|
130
|
+
"-R",
|
131
|
+
"--retriever",
|
132
|
+
help="Name of custom retriever to use",
|
133
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
134
|
+
),
|
135
|
+
] = None,
|
136
|
+
max_tokens: Annotated[
|
137
|
+
int,
|
138
|
+
typer.Option(
|
139
|
+
"--max-tokens",
|
140
|
+
"-M",
|
141
|
+
help="The maximum number of tokens the model will take in. "
|
142
|
+
"If unspecificed, model's default max will be used.",
|
143
|
+
),
|
144
|
+
] = None,
|
145
|
+
use_janus_inputs: Annotated[
|
146
|
+
bool,
|
147
|
+
typer.Option(
|
148
|
+
"-j",
|
149
|
+
"--use-janus-inputs",
|
150
|
+
help="Present if converter should use janus files as inputs",
|
151
|
+
),
|
152
|
+
] = False,
|
153
|
+
):
|
154
|
+
from janus.cli.constants import db_loc, get_collections_config
|
155
|
+
from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
|
156
|
+
from janus.converter.requirements import RequirementsDocumenter
|
157
|
+
|
158
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
159
|
+
model_arguments = dict(temperature=temperature)
|
160
|
+
collections_config = get_collections_config()
|
161
|
+
kwargs = dict(
|
162
|
+
model=llm_name,
|
163
|
+
model_arguments=model_arguments,
|
164
|
+
source_language=language,
|
165
|
+
max_prompts=max_prompts,
|
166
|
+
max_tokens=max_tokens,
|
167
|
+
db_path=db_loc,
|
168
|
+
db_config=collections_config,
|
169
|
+
splitter_type=splitter_type,
|
170
|
+
refiner_types=refiner_types,
|
171
|
+
retriever_type=retriever_type,
|
172
|
+
use_janus_inputs=use_janus_inputs,
|
173
|
+
)
|
174
|
+
if doc_mode == "cloze":
|
175
|
+
documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
|
176
|
+
elif doc_mode == "multidoc":
|
177
|
+
documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
|
178
|
+
elif doc_mode == "requirements":
|
179
|
+
documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
|
180
|
+
else:
|
181
|
+
documenter = Documenter(drop_comments=drop_comments, **kwargs)
|
182
|
+
|
183
|
+
documenter.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|