janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +111 -0
  10. janus/cli/constants.py +43 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +178 -0
  13. janus/cli/document.py +174 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +187 -0
  16. janus/cli/partition.py +125 -0
  17. janus/cli/self_eval.py +149 -0
  18. janus/cli/translate.py +183 -0
  19. janus/converter/__init__.py +1 -1
  20. janus/converter/_tests/test_translate.py +2 -0
  21. janus/converter/converter.py +129 -92
  22. janus/converter/document.py +21 -14
  23. janus/converter/evaluate.py +237 -4
  24. janus/converter/translate.py +3 -3
  25. janus/embedding/collections.py +1 -1
  26. janus/language/alc/_tests/alc.asm +3779 -0
  27. janus/language/alc/_tests/test_alc.py +1 -1
  28. janus/language/alc/alc.py +9 -4
  29. janus/language/binary/_tests/hello.bin +0 -0
  30. janus/language/block.py +47 -12
  31. janus/language/file.py +1 -1
  32. janus/language/mumps/_tests/mumps.m +235 -0
  33. janus/language/splitter.py +31 -23
  34. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  35. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  36. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  37. janus/language/treesitter/treesitter.py +9 -1
  38. janus/llm/models_info.py +26 -13
  39. janus/metrics/_tests/asm_test_file.asm +10 -0
  40. janus/metrics/_tests/mumps_test_file.m +6 -0
  41. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  42. janus/metrics/prompts/clarity.txt +8 -0
  43. janus/metrics/prompts/completeness.txt +16 -0
  44. janus/metrics/prompts/faithfulness.txt +10 -0
  45. janus/metrics/prompts/hallucination.txt +16 -0
  46. janus/metrics/prompts/quality.txt +8 -0
  47. janus/metrics/prompts/readability.txt +16 -0
  48. janus/metrics/prompts/usefulness.txt +16 -0
  49. janus/parsers/code_parser.py +4 -4
  50. janus/parsers/doc_parser.py +12 -9
  51. janus/parsers/eval_parsers/incose_parser.py +134 -0
  52. janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +47 -13
  55. janus/parsers/reqs_parser.py +8 -5
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +143 -4
  125. janus/utils/enums.py +140 -111
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
  128. janus_llm-4.3.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1343
  132. janus_llm-4.2.0.dist-info/RECORD +0 -113
  133. janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
  134. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0
janus/cli/database.py ADDED
@@ -0,0 +1,289 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import typer
5
+ from typing_extensions import Annotated
6
+
7
+ from janus.cli.constants import db_loc, janus_dir
8
+
9
+ db = typer.Typer(
10
+ help="Database commands",
11
+ add_completion=False,
12
+ no_args_is_help=True,
13
+ context_settings={"help_option_names": ["-h", "--help"]},
14
+ )
15
+
16
+
17
+ @db.command("init", help="Connect to or create a database.")
18
+ def db_init(
19
+ path: Annotated[
20
+ str, typer.Option("--path", "-p", help="The path to the database file.")
21
+ ] = str(janus_dir / "chroma.db"),
22
+ url: Annotated[
23
+ str,
24
+ typer.Option(
25
+ "--url",
26
+ "-u",
27
+ help="The URL of the database if the database is running externally.",
28
+ ),
29
+ ] = "",
30
+ ) -> None:
31
+ import os
32
+
33
+ from rich import print
34
+
35
+ from janus.cli.constants import db_file
36
+ from janus.embedding.database import ChromaEmbeddingDatabase
37
+
38
+ global db_loc
39
+ if url != "":
40
+ print(f"Pointing to Chroma DB at {url}")
41
+ with open(db_file, "w") as f:
42
+ f.write(url)
43
+ db_loc = url
44
+ else:
45
+ path = os.path.abspath(path)
46
+ print(f"Setting up Chroma DB at {path}")
47
+ with open(db_file, "w") as f:
48
+ f.write(path)
49
+ db_loc = path
50
+ global embedding_db
51
+ embedding_db = ChromaEmbeddingDatabase(db_loc)
52
+
53
+
54
+ @db.command("status", help="Print current database location.")
55
+ def db_status():
56
+ from rich import print
57
+
58
+ print(f"Chroma DB currently pointing to {db_loc}")
59
+
60
+
61
+ @db.command(
62
+ "ls",
63
+ help="List the current database's collections. Or supply a collection name to list "
64
+ "information about its contents.",
65
+ )
66
+ def db_ls(
67
+ collection_name: Annotated[
68
+ Optional[str], typer.Argument(help="The name of the collection.")
69
+ ] = None,
70
+ peek: Annotated[
71
+ Optional[int],
72
+ typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
73
+ ] = None,
74
+ ) -> None:
75
+ """List the current database's collections"""
76
+ from rich import print
77
+
78
+ from janus.embedding.database import ChromaEmbeddingDatabase
79
+
80
+ if peek is not None and collection_name is None:
81
+ print(
82
+ "\n[bold red]Cannot peek at all collections. Please specify a "
83
+ "collection by name.[/bold red]"
84
+ )
85
+ return
86
+ db = ChromaEmbeddingDatabase(db_loc)
87
+ from janus.embedding.collections import Collections
88
+
89
+ collections = Collections(db)
90
+ collection_list = collections.get(collection_name)
91
+ for collection in collection_list:
92
+ print(
93
+ f"\n[bold underline]Collection[/bold underline]: "
94
+ f"[bold salmon1]{collection.name}[/bold salmon1]"
95
+ )
96
+ print(f" ID: {collection.id}")
97
+ print(f" Metadata: {collection.metadata}")
98
+ print(f" Tenant: [green]{collection.tenant}[/green]")
99
+ print(f" Database: [green]{collection.database}[/green]")
100
+ print(f" Length: {collection.count()}")
101
+ if peek:
102
+ entry = collection.peek(peek)
103
+ entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
104
+ if peek == 1:
105
+ print(" [bold]Peeking at first entry[/bold]:")
106
+ else:
107
+ print(f" [bold]Peeking at first {peek} entries[/bold]:")
108
+ print(entry)
109
+ print()
110
+
111
+
112
+ @db.command("add", help="Add a collection to the current database.")
113
+ def db_add(
114
+ collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
115
+ model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
116
+ input_dir: Annotated[
117
+ str,
118
+ typer.Option(
119
+ "--input",
120
+ "-i",
121
+ help="The directory containing the source code to be added.",
122
+ ),
123
+ ] = "./",
124
+ input_lang: Annotated[
125
+ str, typer.Option("--language", "-l", help="The language of the source code.")
126
+ ] = "python",
127
+ max_tokens: Annotated[
128
+ int,
129
+ typer.Option(
130
+ "--max-tokens",
131
+ "-m",
132
+ help="The maximum number of tokens for each chunk of input source code.",
133
+ ),
134
+ ] = 4096,
135
+ ) -> None:
136
+ """Add a collection to the database
137
+
138
+ Arguments:
139
+ collection_name: The name of the collection to add
140
+ model_name: The name of the embedding model to use
141
+ input_dir: The directory containing the source code to be added
142
+ input_lang: The language of the source code
143
+ max_tokens: The maximum number of tokens for each chunk of input source code
144
+ """
145
+ # TODO: import factory
146
+ import json
147
+ from pathlib import Path
148
+
149
+ from rich.console import Console
150
+
151
+ from janus.cli.constants import collections_config_file, get_collections_config
152
+ from janus.embedding.vectorize import ChromaDBVectorizer
153
+ from janus.language.binary import BinarySplitter
154
+ from janus.language.mumps import MumpsSplitter
155
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
156
+ from janus.language.treesitter import TreeSitterSplitter
157
+ from janus.utils.enums import LANGUAGES
158
+
159
+ console = Console()
160
+
161
+ added_to = _check_collection(collection_name, input_dir)
162
+ collections_config = get_collections_config()
163
+
164
+ with console.status(
165
+ f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
166
+ spinner="arrow3",
167
+ ):
168
+ vectorizer_factory = ChromaDBVectorizer()
169
+ vectorizer = vectorizer_factory.create_vectorizer(
170
+ path=db_loc, config=collections_config
171
+ )
172
+ vectorizer.get_or_create_collection(collection_name, model_name=model_name)
173
+ input_dir = Path(input_dir)
174
+ suffixes = [f".{ext}" for ext in LANGUAGES[input_lang]["suffixes"]]
175
+ input_paths = [file for ext in suffixes for file in input_dir.rglob(f"**/*{ext}")]
176
+
177
+ if input_lang in CUSTOM_SPLITTERS:
178
+ if input_lang == "mumps":
179
+ splitter = MumpsSplitter(
180
+ max_tokens=max_tokens,
181
+ )
182
+ elif input_lang == "binary":
183
+ splitter = BinarySplitter(
184
+ max_tokens=max_tokens,
185
+ )
186
+ else:
187
+ splitter = TreeSitterSplitter(
188
+ language=input_lang,
189
+ max_tokens=max_tokens,
190
+ )
191
+ for input_path in input_paths:
192
+ input_block = splitter.split(input_path)
193
+ vectorizer.add_nodes_recursively(
194
+ input_block,
195
+ collection_name,
196
+ input_path.name,
197
+ )
198
+ total_files = len(
199
+ [path for path in Path.glob(input_dir, "**/*") if not path.is_dir()]
200
+ )
201
+ if added_to:
202
+ print(
203
+ f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
204
+ f" Embedding Model: [green]{model_name}[/green]\n"
205
+ f" Input Directory: {input_dir.absolute()}\n"
206
+ f" {input_lang} [green]{suffixes}[/green] Files: "
207
+ f"{len(input_paths)}\n"
208
+ " Other Files (skipped): "
209
+ f"{total_files - len(input_paths)}\n"
210
+ )
211
+ else:
212
+ print(
213
+ f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
214
+ f" Embedding Model: '{model_name}'\n"
215
+ f" Input Directory: {input_dir.absolute()}\n"
216
+ f" {input_lang} [green]{suffixes}[/green] Files: "
217
+ f"{len(input_paths)}\n"
218
+ " Other Files (skipped): "
219
+ f"{total_files - len(input_paths)}\n"
220
+ )
221
+ with open(collections_config_file, "w") as f:
222
+ json.dump(vectorizer.config, f, indent=2)
223
+
224
+
225
+ @db.command(
226
+ "rm",
227
+ help="Remove a collection from the database.",
228
+ )
229
+ def db_rm(
230
+ collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
231
+ confirm: Annotated[
232
+ bool,
233
+ typer.Option(
234
+ "--yes",
235
+ "-y",
236
+ help="Confirm the removal of the collection.",
237
+ ),
238
+ ],
239
+ ) -> None:
240
+ """Remove a collection from the database
241
+
242
+ Arguments:
243
+ collection_name: The name of the collection to remove
244
+ """
245
+ from rich.prompt import Confirm
246
+
247
+ from janus.embedding.collections import Collections
248
+ from janus.embedding.database import ChromaEmbeddingDatabase
249
+
250
+ if not confirm:
251
+ delete = Confirm.ask(
252
+ f"\nAre you sure you want to [bold red]remove[/bold red] "
253
+ f"[bold salmon1]{collection_name}[/bold salmon1]?",
254
+ )
255
+ else:
256
+ delete = True
257
+ if not delete:
258
+ raise typer.Abort()
259
+ db = ChromaEmbeddingDatabase(db_loc)
260
+ collections = Collections(db)
261
+ collections.delete(collection_name)
262
+ print(
263
+ f"[bold red]Removed[/bold red] collection "
264
+ f"[bold salmon1]{collection_name}[/bold salmon1]"
265
+ )
266
+
267
+
268
+ def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
269
+ from chromadb.errors import InvalidCollectionException
270
+
271
+ from janus.embedding.collections import Collections
272
+ from janus.embedding.database import ChromaEmbeddingDatabase
273
+
274
+ db = ChromaEmbeddingDatabase(db_loc)
275
+ collections = Collections(db)
276
+ added_to = False
277
+ try:
278
+ collections.get(collection_name)
279
+ # confirm_add = Confirm.ask(
280
+ # f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
281
+ # "you sure you want to update it with the contents of"
282
+ # f"[bold green]{input_dir}[/bold green]?"
283
+ # )
284
+ added_to = True
285
+ # if not confirm_add:
286
+ # raise typer.Abort()
287
+ except InvalidCollectionException:
288
+ pass
289
+ return added_to
janus/cli/diagram.py ADDED
@@ -0,0 +1,178 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def diagram(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be translated. "
20
+ "The files should all be in one flat directory.",
21
+ ),
22
+ ],
23
+ language: Annotated[
24
+ str,
25
+ typer.Option(
26
+ "--language",
27
+ "-l",
28
+ help="The language of the source code.",
29
+ click_type=click.Choice(sorted(LANGUAGES)),
30
+ ),
31
+ ],
32
+ output_dir: Annotated[
33
+ Path,
34
+ typer.Option(
35
+ "--output-dir", "-o", help="The directory to store the translated code in."
36
+ ),
37
+ ],
38
+ llm_name: Annotated[
39
+ str,
40
+ typer.Option(
41
+ "--llm",
42
+ "-L",
43
+ help="The custom name of the model set with 'janus llm add'.",
44
+ ),
45
+ ],
46
+ failure_dir: Annotated[
47
+ Optional[Path],
48
+ typer.Option(
49
+ "--failure-directory",
50
+ "-f",
51
+ help="The directory to store failure files during translation",
52
+ ),
53
+ ] = None,
54
+ max_prompts: Annotated[
55
+ int,
56
+ typer.Option(
57
+ "--max-prompts",
58
+ "-m",
59
+ help="The maximum number of times to prompt a model on one functional block "
60
+ "before exiting the application. This is to prevent wasting too much money.",
61
+ ),
62
+ ] = 10,
63
+ overwrite: Annotated[
64
+ bool,
65
+ typer.Option(
66
+ "--overwrite/--preserve",
67
+ help="Whether to overwrite existing files in the output directory",
68
+ ),
69
+ ] = False,
70
+ temperature: Annotated[
71
+ float,
72
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
73
+ ] = 0.7,
74
+ collection: Annotated[
75
+ str,
76
+ typer.Option(
77
+ "--collection",
78
+ "-c",
79
+ help="If set, will put the translated result into a Chroma DB "
80
+ "collection with the name provided.",
81
+ ),
82
+ ] = None,
83
+ diagram_type: Annotated[
84
+ str,
85
+ typer.Option(
86
+ "--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
87
+ ),
88
+ ] = "Activity",
89
+ add_documentation: Annotated[
90
+ bool,
91
+ typer.Option(
92
+ "--add-documentation/--no-documentation",
93
+ "-ad",
94
+ help="Whether to use documentation in generation",
95
+ ),
96
+ ] = False,
97
+ splitter_type: Annotated[
98
+ str,
99
+ typer.Option(
100
+ "-S",
101
+ "--splitter",
102
+ help="Name of custom splitter to use",
103
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
104
+ ),
105
+ ] = "file",
106
+ refiner_types: Annotated[
107
+ list[str],
108
+ typer.Option(
109
+ "-r",
110
+ "--refiner",
111
+ help="List of refiner types to use. Add -r for each refiner to use in\
112
+ refinement chain",
113
+ click_type=click.Choice(list(REFINERS.keys())),
114
+ ),
115
+ ] = ["JanusRefiner"],
116
+ retriever_type: Annotated[
117
+ str,
118
+ typer.Option(
119
+ "-R",
120
+ "--retriever",
121
+ help="Name of custom retriever to use",
122
+ click_type=click.Choice(["active_usings", "language_docs"]),
123
+ ),
124
+ ] = None,
125
+ ):
126
+ from janus.cli.constants import db_loc, get_collections_config
127
+ from janus.converter.diagram import DiagramGenerator
128
+
129
+ refiner_types = [REFINERS[r] for r in refiner_types]
130
+ model_arguments = dict(temperature=temperature)
131
+ collections_config = get_collections_config()
132
+ diagram_generator = DiagramGenerator(
133
+ model=llm_name,
134
+ model_arguments=model_arguments,
135
+ source_language=language,
136
+ max_prompts=max_prompts,
137
+ db_path=db_loc,
138
+ db_config=collections_config,
139
+ splitter_type=splitter_type,
140
+ refiner_types=refiner_types,
141
+ retriever_type=retriever_type,
142
+ diagram_type=diagram_type,
143
+ add_documentation=add_documentation,
144
+ )
145
+ diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
146
+
147
+
148
+ def render(
149
+ input_dir: Annotated[
150
+ str,
151
+ typer.Option(
152
+ "--input",
153
+ "-i",
154
+ ),
155
+ ],
156
+ output_dir: Annotated[str, typer.Option("--output", "-o")],
157
+ ):
158
+ import json
159
+ import subprocess # nosec
160
+
161
+ from janus.cli.constants import homedir
162
+
163
+ input_dir = Path(input_dir)
164
+ output_dir = Path(output_dir)
165
+ for input_file in input_dir.rglob("*.json"):
166
+ with open(input_file, "r") as f:
167
+ data = json.load(f)
168
+
169
+ output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
170
+ if not output_file.parent.exists():
171
+ output_file.parent.mkdir()
172
+
173
+ text = data["output"].replace("\\n", "\n").strip()
174
+ output_file.write_text(text)
175
+
176
+ jar_path = homedir / ".janus/lib/plantuml.jar"
177
+ subprocess.run(["java", "-jar", jar_path, output_file]) # nosec
178
+ output_file.unlink()
janus/cli/document.py ADDED
@@ -0,0 +1,174 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def document(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be translated. "
20
+ "The files should all be in one flat directory.",
21
+ ),
22
+ ],
23
+ language: Annotated[
24
+ str,
25
+ typer.Option(
26
+ "--language",
27
+ "-l",
28
+ help="The language of the source code.",
29
+ click_type=click.Choice(sorted(LANGUAGES)),
30
+ ),
31
+ ],
32
+ output_dir: Annotated[
33
+ Path,
34
+ typer.Option(
35
+ "--output-dir", "-o", help="The directory to store the translated code in."
36
+ ),
37
+ ],
38
+ llm_name: Annotated[
39
+ str,
40
+ typer.Option(
41
+ "--llm",
42
+ "-L",
43
+ help="The custom name of the model set with 'janus llm add'.",
44
+ ),
45
+ ],
46
+ failure_dir: Annotated[
47
+ Optional[Path],
48
+ typer.Option(
49
+ "--failure-directory",
50
+ "-f",
51
+ help="The directory to store failure files during documentation",
52
+ ),
53
+ ] = None,
54
+ max_prompts: Annotated[
55
+ int,
56
+ typer.Option(
57
+ "--max-prompts",
58
+ "-m",
59
+ help="The maximum number of times to prompt a model on one functional block "
60
+ "before exiting the application. This is to prevent wasting too much money.",
61
+ ),
62
+ ] = 10,
63
+ overwrite: Annotated[
64
+ bool,
65
+ typer.Option(
66
+ "--overwrite/--preserve",
67
+ help="Whether to overwrite existing files in the output directory",
68
+ ),
69
+ ] = False,
70
+ doc_mode: Annotated[
71
+ str,
72
+ typer.Option(
73
+ "--doc-mode",
74
+ "-d",
75
+ help="The documentation mode.",
76
+ click_type=click.Choice(["cloze", "summary", "multidoc", "requirements"]),
77
+ ),
78
+ ] = "cloze",
79
+ comments_per_request: Annotated[
80
+ int,
81
+ typer.Option(
82
+ "--comments-per-request",
83
+ "-rc",
84
+ help="The maximum number of comments to generate per request when using "
85
+ "Cloze documentation mode.",
86
+ ),
87
+ ] = None,
88
+ drop_comments: Annotated[
89
+ bool,
90
+ typer.Option(
91
+ "--drop-comments/--keep-comments",
92
+ help="Whether to drop or keep comments in the code sent to the LLM",
93
+ ),
94
+ ] = False,
95
+ temperature: Annotated[
96
+ float,
97
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
98
+ ] = 0.7,
99
+ collection: Annotated[
100
+ str,
101
+ typer.Option(
102
+ "--collection",
103
+ "-c",
104
+ help="If set, will put the translated result into a Chroma DB "
105
+ "collection with the name provided.",
106
+ ),
107
+ ] = None,
108
+ splitter_type: Annotated[
109
+ str,
110
+ typer.Option(
111
+ "-S",
112
+ "--splitter",
113
+ help="Name of custom splitter to use",
114
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
115
+ ),
116
+ ] = "file",
117
+ refiner_types: Annotated[
118
+ list[str],
119
+ typer.Option(
120
+ "-r",
121
+ "--refiner",
122
+ help="List of refiner types to use. Add -r for each refiner to use in\
123
+ refinement chain",
124
+ click_type=click.Choice(list(REFINERS.keys())),
125
+ ),
126
+ ] = ["JanusRefiner"],
127
+ retriever_type: Annotated[
128
+ str,
129
+ typer.Option(
130
+ "-R",
131
+ "--retriever",
132
+ help="Name of custom retriever to use",
133
+ click_type=click.Choice(["active_usings", "language_docs"]),
134
+ ),
135
+ ] = None,
136
+ max_tokens: Annotated[
137
+ int,
138
+ typer.Option(
139
+ "--max-tokens",
140
+ "-M",
141
+ help="The maximum number of tokens the model will take in. "
142
+ "If unspecificed, model's default max will be used.",
143
+ ),
144
+ ] = None,
145
+ ):
146
+ from janus.cli.constants import db_loc, get_collections_config
147
+ from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
148
+ from janus.converter.requirements import RequirementsDocumenter
149
+
150
+ refiner_types = [REFINERS[r] for r in refiner_types]
151
+ model_arguments = dict(temperature=temperature)
152
+ collections_config = get_collections_config()
153
+ kwargs = dict(
154
+ model=llm_name,
155
+ model_arguments=model_arguments,
156
+ source_language=language,
157
+ max_prompts=max_prompts,
158
+ max_tokens=max_tokens,
159
+ db_path=db_loc,
160
+ db_config=collections_config,
161
+ splitter_type=splitter_type,
162
+ refiner_types=refiner_types,
163
+ retriever_type=retriever_type,
164
+ )
165
+ if doc_mode == "cloze":
166
+ documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
167
+ elif doc_mode == "multidoc":
168
+ documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
169
+ elif doc_mode == "requirements":
170
+ documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
171
+ else:
172
+ documenter = Documenter(drop_comments=drop_comments, **kwargs)
173
+
174
+ documenter.translate(input_dir, output_dir, failure_dir, overwrite, collection)