janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +117 -0
  10. janus/cli/constants.py +49 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +207 -0
  13. janus/cli/document.py +183 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +191 -0
  16. janus/cli/partition.py +134 -0
  17. janus/cli/pipeline.py +123 -0
  18. janus/cli/self_eval.py +147 -0
  19. janus/cli/translate.py +192 -0
  20. janus/converter/__init__.py +1 -1
  21. janus/converter/_tests/test_translate.py +7 -5
  22. janus/converter/chain.py +180 -0
  23. janus/converter/converter.py +444 -153
  24. janus/converter/diagram.py +8 -6
  25. janus/converter/document.py +27 -16
  26. janus/converter/evaluate.py +143 -144
  27. janus/converter/partition.py +2 -10
  28. janus/converter/requirements.py +4 -40
  29. janus/converter/translate.py +3 -59
  30. janus/embedding/collections.py +1 -1
  31. janus/language/alc/_tests/alc.asm +3779 -0
  32. janus/language/binary/_tests/hello.bin +0 -0
  33. janus/language/block.py +78 -14
  34. janus/language/file.py +1 -1
  35. janus/language/mumps/_tests/mumps.m +235 -0
  36. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  37. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  38. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  39. janus/llm/models_info.py +9 -1
  40. janus/metrics/_tests/asm_test_file.asm +10 -0
  41. janus/metrics/_tests/mumps_test_file.m +6 -0
  42. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  43. janus/metrics/metric.py +47 -124
  44. janus/metrics/prompts/clarity.txt +8 -0
  45. janus/metrics/prompts/completeness.txt +16 -0
  46. janus/metrics/prompts/faithfulness.txt +10 -0
  47. janus/metrics/prompts/hallucination.txt +16 -0
  48. janus/metrics/prompts/quality.txt +8 -0
  49. janus/metrics/prompts/readability.txt +16 -0
  50. janus/metrics/prompts/usefulness.txt +16 -0
  51. janus/parsers/code_parser.py +4 -4
  52. janus/parsers/doc_parser.py +12 -9
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +6 -4
  55. janus/parsers/reqs_parser.py +11 -8
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +113 -4
  125. janus/utils/enums.py +127 -112
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
  128. janus_llm-4.4.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1488
  132. janus/metrics/_tests/test_llm.py +0 -90
  133. janus/metrics/llm_metrics.py +0 -202
  134. janus_llm-4.3.1.dist-info/RECORD +0 -115
  135. janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
  136. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/database.py ADDED
@@ -0,0 +1,289 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import typer
5
+ from typing_extensions import Annotated
6
+
7
+ from janus.cli.constants import db_loc, janus_dir
8
+
9
+ db = typer.Typer(
10
+ help="Database commands",
11
+ add_completion=False,
12
+ no_args_is_help=True,
13
+ context_settings={"help_option_names": ["-h", "--help"]},
14
+ )
15
+
16
+
17
+ @db.command("init", help="Connect to or create a database.")
18
+ def db_init(
19
+ path: Annotated[
20
+ str, typer.Option("--path", "-p", help="The path to the database file.")
21
+ ] = str(janus_dir / "chroma.db"),
22
+ url: Annotated[
23
+ str,
24
+ typer.Option(
25
+ "--url",
26
+ "-u",
27
+ help="The URL of the database if the database is running externally.",
28
+ ),
29
+ ] = "",
30
+ ) -> None:
31
+ import os
32
+
33
+ from rich import print
34
+
35
+ from janus.cli.constants import db_file
36
+ from janus.embedding.database import ChromaEmbeddingDatabase
37
+
38
+ global db_loc
39
+ if url != "":
40
+ print(f"Pointing to Chroma DB at {url}")
41
+ with open(db_file, "w") as f:
42
+ f.write(url)
43
+ db_loc = url
44
+ else:
45
+ path = os.path.abspath(path)
46
+ print(f"Setting up Chroma DB at {path}")
47
+ with open(db_file, "w") as f:
48
+ f.write(path)
49
+ db_loc = path
50
+ global embedding_db
51
+ embedding_db = ChromaEmbeddingDatabase(db_loc)
52
+
53
+
54
+ @db.command("status", help="Print current database location.")
55
+ def db_status():
56
+ from rich import print
57
+
58
+ print(f"Chroma DB currently pointing to {db_loc}")
59
+
60
+
61
+ @db.command(
62
+ "ls",
63
+ help="List the current database's collections. Or supply a collection name to list "
64
+ "information about its contents.",
65
+ )
66
+ def db_ls(
67
+ collection_name: Annotated[
68
+ Optional[str], typer.Argument(help="The name of the collection.")
69
+ ] = None,
70
+ peek: Annotated[
71
+ Optional[int],
72
+ typer.Option("--peek", "-p", help="Peek at N entries for a specific collection."),
73
+ ] = None,
74
+ ) -> None:
75
+ """List the current database's collections"""
76
+ from rich import print
77
+
78
+ from janus.embedding.database import ChromaEmbeddingDatabase
79
+
80
+ if peek is not None and collection_name is None:
81
+ print(
82
+ "\n[bold red]Cannot peek at all collections. Please specify a "
83
+ "collection by name.[/bold red]"
84
+ )
85
+ return
86
+ db = ChromaEmbeddingDatabase(db_loc)
87
+ from janus.embedding.collections import Collections
88
+
89
+ collections = Collections(db)
90
+ collection_list = collections.get(collection_name)
91
+ for collection in collection_list:
92
+ print(
93
+ f"\n[bold underline]Collection[/bold underline]: "
94
+ f"[bold salmon1]{collection.name}[/bold salmon1]"
95
+ )
96
+ print(f" ID: {collection.id}")
97
+ print(f" Metadata: {collection.metadata}")
98
+ print(f" Tenant: [green]{collection.tenant}[/green]")
99
+ print(f" Database: [green]{collection.database}[/green]")
100
+ print(f" Length: {collection.count()}")
101
+ if peek:
102
+ entry = collection.peek(peek)
103
+ entry["embeddings"] = entry["embeddings"][0][:2] + ["..."]
104
+ if peek == 1:
105
+ print(" [bold]Peeking at first entry[/bold]:")
106
+ else:
107
+ print(f" [bold]Peeking at first {peek} entries[/bold]:")
108
+ print(entry)
109
+ print()
110
+
111
+
112
+ @db.command("add", help="Add a collection to the current database.")
113
+ def db_add(
114
+ collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
115
+ model_name: Annotated[str, typer.Argument(help="The name of the embedding model.")],
116
+ input_dir: Annotated[
117
+ str,
118
+ typer.Option(
119
+ "--input",
120
+ "-i",
121
+ help="The directory containing the source code to be added.",
122
+ ),
123
+ ] = "./",
124
+ input_lang: Annotated[
125
+ str, typer.Option("--language", "-l", help="The language of the source code.")
126
+ ] = "python",
127
+ max_tokens: Annotated[
128
+ int,
129
+ typer.Option(
130
+ "--max-tokens",
131
+ "-m",
132
+ help="The maximum number of tokens for each chunk of input source code.",
133
+ ),
134
+ ] = 4096,
135
+ ) -> None:
136
+ """Add a collection to the database
137
+
138
+ Arguments:
139
+ collection_name: The name of the collection to add
140
+ model_name: The name of the embedding model to use
141
+ input_dir: The directory containing the source code to be added
142
+ input_lang: The language of the source code
143
+ max_tokens: The maximum number of tokens for each chunk of input source code
144
+ """
145
+ # TODO: import factory
146
+ import json
147
+ from pathlib import Path
148
+
149
+ from rich.console import Console
150
+
151
+ from janus.cli.constants import collections_config_file, get_collections_config
152
+ from janus.embedding.vectorize import ChromaDBVectorizer
153
+ from janus.language.binary import BinarySplitter
154
+ from janus.language.mumps import MumpsSplitter
155
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
156
+ from janus.language.treesitter import TreeSitterSplitter
157
+ from janus.utils.enums import LANGUAGES
158
+
159
+ console = Console()
160
+
161
+ added_to = _check_collection(collection_name, input_dir)
162
+ collections_config = get_collections_config()
163
+
164
+ with console.status(
165
+ f"Adding collection: [bold salmon]{collection_name}[/bold salmon]",
166
+ spinner="arrow3",
167
+ ):
168
+ vectorizer_factory = ChromaDBVectorizer()
169
+ vectorizer = vectorizer_factory.create_vectorizer(
170
+ path=db_loc, config=collections_config
171
+ )
172
+ vectorizer.get_or_create_collection(collection_name, model_name=model_name)
173
+ input_dir = Path(input_dir)
174
+ suffixes = [f".{ext}" for ext in LANGUAGES[input_lang]["suffixes"]]
175
+ input_paths = [file for ext in suffixes for file in input_dir.rglob(f"**/*{ext}")]
176
+
177
+ if input_lang in CUSTOM_SPLITTERS:
178
+ if input_lang == "mumps":
179
+ splitter = MumpsSplitter(
180
+ max_tokens=max_tokens,
181
+ )
182
+ elif input_lang == "binary":
183
+ splitter = BinarySplitter(
184
+ max_tokens=max_tokens,
185
+ )
186
+ else:
187
+ splitter = TreeSitterSplitter(
188
+ language=input_lang,
189
+ max_tokens=max_tokens,
190
+ )
191
+ for input_path in input_paths:
192
+ input_block = splitter.split(input_path)
193
+ vectorizer.add_nodes_recursively(
194
+ input_block,
195
+ collection_name,
196
+ input_path.name,
197
+ )
198
+ total_files = len(
199
+ [path for path in Path.glob(input_dir, "**/*") if not path.is_dir()]
200
+ )
201
+ if added_to:
202
+ print(
203
+ f"\nAdded to [bold salmon1]{collection_name}[/bold salmon1]:\n"
204
+ f" Embedding Model: [green]{model_name}[/green]\n"
205
+ f" Input Directory: {input_dir.absolute()}\n"
206
+ f" {input_lang} [green]{suffixes}[/green] Files: "
207
+ f"{len(input_paths)}\n"
208
+ " Other Files (skipped): "
209
+ f"{total_files - len(input_paths)}\n"
210
+ )
211
+ else:
212
+ print(
213
+ f"\nCreated [bold salmon1]{collection_name}[/bold salmon1]:\n"
214
+ f" Embedding Model: '{model_name}'\n"
215
+ f" Input Directory: {input_dir.absolute()}\n"
216
+ f" {input_lang} [green]{suffixes}[/green] Files: "
217
+ f"{len(input_paths)}\n"
218
+ " Other Files (skipped): "
219
+ f"{total_files - len(input_paths)}\n"
220
+ )
221
+ with open(collections_config_file, "w") as f:
222
+ json.dump(vectorizer.config, f, indent=2)
223
+
224
+
225
+ @db.command(
226
+ "rm",
227
+ help="Remove a collection from the database.",
228
+ )
229
+ def db_rm(
230
+ collection_name: Annotated[str, typer.Argument(help="The name of the collection.")],
231
+ confirm: Annotated[
232
+ bool,
233
+ typer.Option(
234
+ "--yes",
235
+ "-y",
236
+ help="Confirm the removal of the collection.",
237
+ ),
238
+ ],
239
+ ) -> None:
240
+ """Remove a collection from the database
241
+
242
+ Arguments:
243
+ collection_name: The name of the collection to remove
244
+ """
245
+ from rich.prompt import Confirm
246
+
247
+ from janus.embedding.collections import Collections
248
+ from janus.embedding.database import ChromaEmbeddingDatabase
249
+
250
+ if not confirm:
251
+ delete = Confirm.ask(
252
+ f"\nAre you sure you want to [bold red]remove[/bold red] "
253
+ f"[bold salmon1]{collection_name}[/bold salmon1]?",
254
+ )
255
+ else:
256
+ delete = True
257
+ if not delete:
258
+ raise typer.Abort()
259
+ db = ChromaEmbeddingDatabase(db_loc)
260
+ collections = Collections(db)
261
+ collections.delete(collection_name)
262
+ print(
263
+ f"[bold red]Removed[/bold red] collection "
264
+ f"[bold salmon1]{collection_name}[/bold salmon1]"
265
+ )
266
+
267
+
268
+ def _check_collection(collection_name: str, input_dir: str | Path) -> bool:
269
+ from chromadb.errors import InvalidCollectionException
270
+
271
+ from janus.embedding.collections import Collections
272
+ from janus.embedding.database import ChromaEmbeddingDatabase
273
+
274
+ db = ChromaEmbeddingDatabase(db_loc)
275
+ collections = Collections(db)
276
+ added_to = False
277
+ try:
278
+ collections.get(collection_name)
279
+ # confirm_add = Confirm.ask(
280
+ # f"\nCollection [bold salmon1]{collection_name}[/bold salmon1] exists. Are "
281
+ # "you sure you want to update it with the contents of"
282
+ # f"[bold green]{input_dir}[/bold green]?"
283
+ # )
284
+ added_to = True
285
+ # if not confirm_add:
286
+ # raise typer.Abort()
287
+ except InvalidCollectionException:
288
+ pass
289
+ return added_to
janus/cli/diagram.py ADDED
@@ -0,0 +1,207 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def diagram(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be translated. "
20
+ "The files should all be in one flat directory.",
21
+ ),
22
+ ],
23
+ language: Annotated[
24
+ str,
25
+ typer.Option(
26
+ "--language",
27
+ "-l",
28
+ help="The language of the source code.",
29
+ click_type=click.Choice(sorted(LANGUAGES)),
30
+ ),
31
+ ],
32
+ output_dir: Annotated[
33
+ Path,
34
+ typer.Option(
35
+ "--output", "-o", help="The directory to store the translated code in."
36
+ ),
37
+ ],
38
+ llm_name: Annotated[
39
+ str,
40
+ typer.Option(
41
+ "--llm",
42
+ "-L",
43
+ help="The custom name of the model set with 'janus llm add'.",
44
+ ),
45
+ ],
46
+ failure_dir: Annotated[
47
+ Optional[Path],
48
+ typer.Option(
49
+ "--failure-directory",
50
+ "-f",
51
+ help="The directory to store failure files during translation",
52
+ ),
53
+ ] = None,
54
+ max_prompts: Annotated[
55
+ int,
56
+ typer.Option(
57
+ "--max-prompts",
58
+ "-m",
59
+ help="The maximum number of times to prompt a model on one functional block "
60
+ "before exiting the application. This is to prevent wasting too much money.",
61
+ ),
62
+ ] = 10,
63
+ overwrite: Annotated[
64
+ bool,
65
+ typer.Option(
66
+ "--overwrite/--preserve",
67
+ help="Whether to overwrite existing files in the output directory",
68
+ ),
69
+ ] = False,
70
+ temperature: Annotated[
71
+ float,
72
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
73
+ ] = 0.7,
74
+ collection: Annotated[
75
+ str,
76
+ typer.Option(
77
+ "--collection",
78
+ "-c",
79
+ help="If set, will put the translated result into a Chroma DB "
80
+ "collection with the name provided.",
81
+ ),
82
+ ] = None,
83
+ diagram_type: Annotated[
84
+ str,
85
+ typer.Option(
86
+ "--diagram-type", "-dg", help="Diagram type to generate in PLANTUML"
87
+ ),
88
+ ] = "Activity",
89
+ add_documentation: Annotated[
90
+ bool,
91
+ typer.Option(
92
+ "--add-documentation/--no-documentation",
93
+ "-ad",
94
+ help="Whether to use documentation in generation",
95
+ ),
96
+ ] = False,
97
+ splitter_type: Annotated[
98
+ str,
99
+ typer.Option(
100
+ "-S",
101
+ "--splitter",
102
+ help="Name of custom splitter to use",
103
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
104
+ ),
105
+ ] = "file",
106
+ refiner_types: Annotated[
107
+ list[str],
108
+ typer.Option(
109
+ "-r",
110
+ "--refiner",
111
+ help="List of refiner types to use. Add -r for each refiner to use in\
112
+ refinement chain",
113
+ click_type=click.Choice(list(REFINERS.keys())),
114
+ ),
115
+ ] = ["CodeFormatRefiner"],
116
+ retriever_type: Annotated[
117
+ str,
118
+ typer.Option(
119
+ "-R",
120
+ "--retriever",
121
+ help="Name of custom retriever to use",
122
+ click_type=click.Choice(["active_usings", "language_docs"]),
123
+ ),
124
+ ] = None,
125
+ extract_variables: Annotated[
126
+ bool,
127
+ typer.Option(
128
+ "-ev",
129
+ "--extract-variables",
130
+ help="Present when diagram generator should \
131
+ extract variables before producing diagram",
132
+ ),
133
+ ] = False,
134
+ use_janus_inputs: Annotated[
135
+ bool,
136
+ typer.Option(
137
+ "-j",
138
+ "--use-janus-inputs",
139
+ help="Present when diagram generator should be\
140
+ be using janus files as inputs",
141
+ ),
142
+ ] = False,
143
+ ):
144
+ from janus.cli.constants import db_loc, get_collections_config
145
+ from janus.converter.diagram import DiagramGenerator
146
+
147
+ refiner_types = [REFINERS[r] for r in refiner_types]
148
+ model_arguments = dict(temperature=temperature)
149
+ collections_config = get_collections_config()
150
+ diagram_generator = DiagramGenerator(
151
+ model=llm_name,
152
+ model_arguments=model_arguments,
153
+ source_language=language,
154
+ max_prompts=max_prompts,
155
+ db_path=db_loc,
156
+ db_config=collections_config,
157
+ splitter_type=splitter_type,
158
+ refiner_types=refiner_types,
159
+ retriever_type=retriever_type,
160
+ diagram_type=diagram_type,
161
+ add_documentation=add_documentation,
162
+ extract_variables=extract_variables,
163
+ use_janus_inputs=use_janus_inputs,
164
+ )
165
+ diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
166
+
167
+
168
+ def render(
169
+ input_dir: Annotated[
170
+ str,
171
+ typer.Option(
172
+ "--input",
173
+ "-i",
174
+ ),
175
+ ],
176
+ output_dir: Annotated[str, typer.Option("--output", "-o")],
177
+ ):
178
+ import json
179
+ import subprocess # nosec
180
+
181
+ from janus.cli.constants import homedir
182
+
183
+ input_dir = Path(input_dir)
184
+ output_dir = Path(output_dir)
185
+ for input_file in input_dir.rglob("*.json"):
186
+ with open(input_file, "r") as f:
187
+ data = json.load(f)
188
+
189
+ output_file = output_dir / input_file.relative_to(input_dir).with_suffix(".txt")
190
+ if not output_file.parent.exists():
191
+ output_file.parent.mkdir()
192
+
193
+ def _render(obj, ind=0):
194
+ for o in obj["outputs"]:
195
+ if isinstance(o, dict):
196
+ ind += _render(o, ind)
197
+ else:
198
+ outfile_new = output_file.with_stem(f"{output_file.stem}_{ind}")
199
+ text = o.replace("\\n", "\n").strip()
200
+ outfile_new.write_text(text)
201
+ jar_path = homedir / ".janus/lib/plantuml.jar"
202
+ subprocess.run(["java", "-jar", jar_path, outfile_new]) # nosec
203
+ outfile_new.unlink()
204
+ ind += 1
205
+ return ind
206
+
207
+ _render(data)
janus/cli/document.py ADDED
@@ -0,0 +1,183 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def document(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be translated. "
20
+ "The files should all be in one flat directory.",
21
+ ),
22
+ ],
23
+ language: Annotated[
24
+ str,
25
+ typer.Option(
26
+ "--language",
27
+ "-l",
28
+ help="The language of the source code.",
29
+ click_type=click.Choice(sorted(LANGUAGES)),
30
+ ),
31
+ ],
32
+ output_dir: Annotated[
33
+ Path,
34
+ typer.Option(
35
+ "--output", "-o", help="The directory to store the translated code in."
36
+ ),
37
+ ],
38
+ llm_name: Annotated[
39
+ str,
40
+ typer.Option(
41
+ "--llm",
42
+ "-L",
43
+ help="The custom name of the model set with 'janus llm add'.",
44
+ ),
45
+ ],
46
+ failure_dir: Annotated[
47
+ Optional[Path],
48
+ typer.Option(
49
+ "--failure-directory",
50
+ "-f",
51
+ help="The directory to store failure files during documentation",
52
+ ),
53
+ ] = None,
54
+ max_prompts: Annotated[
55
+ int,
56
+ typer.Option(
57
+ "--max-prompts",
58
+ "-m",
59
+ help="The maximum number of times to prompt a model on one functional block "
60
+ "before exiting the application. This is to prevent wasting too much money.",
61
+ ),
62
+ ] = 10,
63
+ overwrite: Annotated[
64
+ bool,
65
+ typer.Option(
66
+ "--overwrite/--preserve",
67
+ help="Whether to overwrite existing files in the output directory",
68
+ ),
69
+ ] = False,
70
+ doc_mode: Annotated[
71
+ str,
72
+ typer.Option(
73
+ "--doc-mode",
74
+ "-d",
75
+ help="The documentation mode.",
76
+ click_type=click.Choice(["cloze", "summary", "multidoc", "requirements"]),
77
+ ),
78
+ ] = "cloze",
79
+ comments_per_request: Annotated[
80
+ int,
81
+ typer.Option(
82
+ "--comments-per-request",
83
+ "-rc",
84
+ help="The maximum number of comments to generate per request when using "
85
+ "Cloze documentation mode.",
86
+ ),
87
+ ] = None,
88
+ drop_comments: Annotated[
89
+ bool,
90
+ typer.Option(
91
+ "--drop-comments/--keep-comments",
92
+ help="Whether to drop or keep comments in the code sent to the LLM",
93
+ ),
94
+ ] = False,
95
+ temperature: Annotated[
96
+ float,
97
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
98
+ ] = 0.7,
99
+ collection: Annotated[
100
+ str,
101
+ typer.Option(
102
+ "--collection",
103
+ "-c",
104
+ help="If set, will put the translated result into a Chroma DB "
105
+ "collection with the name provided.",
106
+ ),
107
+ ] = None,
108
+ splitter_type: Annotated[
109
+ str,
110
+ typer.Option(
111
+ "-S",
112
+ "--splitter",
113
+ help="Name of custom splitter to use",
114
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
115
+ ),
116
+ ] = "file",
117
+ refiner_types: Annotated[
118
+ list[str],
119
+ typer.Option(
120
+ "-r",
121
+ "--refiner",
122
+ help="List of refiner types to use. Add -r for each refiner to use in\
123
+ refinement chain",
124
+ click_type=click.Choice(list(REFINERS.keys())),
125
+ ),
126
+ ] = ["JanusRefiner"],
127
+ retriever_type: Annotated[
128
+ str,
129
+ typer.Option(
130
+ "-R",
131
+ "--retriever",
132
+ help="Name of custom retriever to use",
133
+ click_type=click.Choice(["active_usings", "language_docs"]),
134
+ ),
135
+ ] = None,
136
+ max_tokens: Annotated[
137
+ int,
138
+ typer.Option(
139
+ "--max-tokens",
140
+ "-M",
141
+ help="The maximum number of tokens the model will take in. "
142
+ "If unspecificed, model's default max will be used.",
143
+ ),
144
+ ] = None,
145
+ use_janus_inputs: Annotated[
146
+ bool,
147
+ typer.Option(
148
+ "-j",
149
+ "--use-janus-inputs",
150
+ help="Present if converter should use janus files as inputs",
151
+ ),
152
+ ] = False,
153
+ ):
154
+ from janus.cli.constants import db_loc, get_collections_config
155
+ from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
156
+ from janus.converter.requirements import RequirementsDocumenter
157
+
158
+ refiner_types = [REFINERS[r] for r in refiner_types]
159
+ model_arguments = dict(temperature=temperature)
160
+ collections_config = get_collections_config()
161
+ kwargs = dict(
162
+ model=llm_name,
163
+ model_arguments=model_arguments,
164
+ source_language=language,
165
+ max_prompts=max_prompts,
166
+ max_tokens=max_tokens,
167
+ db_path=db_loc,
168
+ db_config=collections_config,
169
+ splitter_type=splitter_type,
170
+ refiner_types=refiner_types,
171
+ retriever_type=retriever_type,
172
+ use_janus_inputs=use_janus_inputs,
173
+ )
174
+ if doc_mode == "cloze":
175
+ documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
176
+ elif doc_mode == "multidoc":
177
+ documenter = MultiDocumenter(drop_comments=drop_comments, **kwargs)
178
+ elif doc_mode == "requirements":
179
+ documenter = RequirementsDocumenter(drop_comments=drop_comments, **kwargs)
180
+ else:
181
+ documenter = Documenter(drop_comments=drop_comments, **kwargs)
182
+
183
+ documenter.translate(input_dir, output_dir, failure_dir, overwrite, collection)