janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +111 -0
  10. janus/cli/constants.py +43 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +178 -0
  13. janus/cli/document.py +174 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +187 -0
  16. janus/cli/partition.py +125 -0
  17. janus/cli/self_eval.py +149 -0
  18. janus/cli/translate.py +183 -0
  19. janus/converter/__init__.py +1 -1
  20. janus/converter/_tests/test_translate.py +2 -0
  21. janus/converter/converter.py +129 -92
  22. janus/converter/document.py +21 -14
  23. janus/converter/evaluate.py +237 -4
  24. janus/converter/translate.py +3 -3
  25. janus/embedding/collections.py +1 -1
  26. janus/language/alc/_tests/alc.asm +3779 -0
  27. janus/language/alc/_tests/test_alc.py +1 -1
  28. janus/language/alc/alc.py +9 -4
  29. janus/language/binary/_tests/hello.bin +0 -0
  30. janus/language/block.py +47 -12
  31. janus/language/file.py +1 -1
  32. janus/language/mumps/_tests/mumps.m +235 -0
  33. janus/language/splitter.py +31 -23
  34. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  35. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  36. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  37. janus/language/treesitter/treesitter.py +9 -1
  38. janus/llm/models_info.py +26 -13
  39. janus/metrics/_tests/asm_test_file.asm +10 -0
  40. janus/metrics/_tests/mumps_test_file.m +6 -0
  41. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  42. janus/metrics/prompts/clarity.txt +8 -0
  43. janus/metrics/prompts/completeness.txt +16 -0
  44. janus/metrics/prompts/faithfulness.txt +10 -0
  45. janus/metrics/prompts/hallucination.txt +16 -0
  46. janus/metrics/prompts/quality.txt +8 -0
  47. janus/metrics/prompts/readability.txt +16 -0
  48. janus/metrics/prompts/usefulness.txt +16 -0
  49. janus/parsers/code_parser.py +4 -4
  50. janus/parsers/doc_parser.py +12 -9
  51. janus/parsers/eval_parsers/incose_parser.py +134 -0
  52. janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +47 -13
  55. janus/parsers/reqs_parser.py +8 -5
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +143 -4
  125. janus/utils/enums.py +140 -111
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
  128. janus_llm-4.3.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1343
  132. janus_llm-4.2.0.dist-info/RECORD +0 -113
  133. janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
  134. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0
janus/__init__.py CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.2.0"
8
+ __version__ = "4.3.5"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
- from janus.cli import app
1
+ from janus.cli.cli import app
2
2
 
3
3
  if __name__ == "__main__":
4
4
  app()
@@ -0,0 +1,85 @@
1
+
2
+
3
+ # LLM Self Eval
4
+
5
+ The `llm-self-eval` command leverages the converter class to perform evaluations on generated outputs from large language models.
6
+
7
+ The input directory structure will be maintained in the output evaluations. The input files should contain an array of items to be evaluated. Each item will be evaluated individually based off of the corresponding prompt. Set evaluations can be performed as well but please note that each file to be evaluated must contain only one array with the items to be evaluated.
8
+
9
+
10
+ ## Evaluation Types
11
+
12
+ | Evaluation Type | Status | Command |
13
+ | ----------- | ----------- | ----------- |
14
+ | Incose (Requirements) | Complete | ```-e "incose"```
15
+ | Inline Comments | Complete | ```-e "comments"```
16
+
17
+ ## How to Run
18
+
19
+ Structure:
20
+ ```
21
+ janus llm-self-eval -i {path/to/input/files} -l json -o {path/to/output/files} -e {evaluator_type} -rc {# of items to evaluate at a time}
22
+
23
+ ```
24
+ | Flag | Meaning | Example |
25
+ | ----------- | ----------- | ----------- |
26
+ | `-i` | Input Directory | ```"path/to/input/files"```
27
+ | `-o` | Output Directory | ```"path/to/output/files"```
28
+ | `-l` | Evaluation language | `json`
29
+ | `-e` | Evaluation Type | ```"incose"```
30
+ | `-rc` | Number of lines to evaluate at a time (rec 5-10) | `5`
31
+
32
+ Example test command:
33
+ ```
34
+ janus llm-self-eval -i _tests/evaluator_tests/incose_tests/ -l json -o EvalOutput/incose/ -e "incose" -rc 5
35
+ ```
36
+ or for an inline comment test
37
+ ```
38
+ janus llm-self-eval -i _tests/evaluator_tests/inline_comment_tests/ -l 'mumps' -o EvalOutput/comments/ -e "comments"
39
+ ```
40
+
41
+ ## Adding an Evaluation Type
42
+ #### 1. Create a Parser
43
+
44
+ - In ```janus/parsers/eval_parsers```
45
+ - See `janus/janus/parsers/eval_parsers/incose_parser.py` for reference
46
+
47
+ #### 2. Create a the Prompt
48
+
49
+ - In ```janus/prompts/eval_prompts```
50
+ - Create a new directory with the name of your evaluation type. **Note: The name that you use is how you will call the evaluation type eg** ```-e "incose"```
51
+ - `human.txt` - this is the main prompt that contains the scoring rubric and the commands for the llm to follow
52
+ - `system.txt` - the initial instructions for the evaluating llm
53
+ - `variables.json` - contains the structure for what the returned evaluation should look like
54
+
55
+
56
+ ## Example Inputs
57
+
58
+ ```
59
+ {
60
+ "code": "DFHEISTG DSECT\nAPPLID DS CL08 CICS Applid\nSYSID DS CL04 CICS SYSID\n*\n***********************************************************************\n* Dynamic Storage Area (End) *\n***********************************************************************\n*\n***********************************************************************\n* DFHCOMMAREA *\n***********************************************************************\n*",
61
+ "requirements": [
62
+ "981273129",
63
+ "Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12 and that it is secure and that it is understandable and the testing must have 100% coverage and it must return useful content to a mixed audience.",
64
+ "The MS_WAIT field shall be a fullword integer (4 bytes) designed to store a wait time ranging from 1 millisecond to 900 milliseconds.",
65
+ "The software shall consider the DFHEISTG data structure as the end of a dynamic storage area.",
66
+ "The software shall recognize a constant named CACBAR, which is equivalent to the numeric value 12, indicating the beginning of a DFHCOMMAREA from the requesting program.",
67
+ "Hi there!",
68
+ "Limitations: The provided code snippet does not specify operations or methods for interacting with the DFHEISTG data structure or the DFHCOMMAREA. The functionality related to these structures must be defined in additional requirements or specifications not provided in the code snippet.",
69
+ "Testing of the DFHEISTG structure shall verify that the STIMERID field can store a 4-byte character string and that the MS_WAIT field can store an integer value within the range of 1 to 900. Additionally, testing should confirm that these fields are part of the defined dynamic storage area.",
70
+ "Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12.",
71
+ "The software requirements specification document must acknowledge areas where the code's functionality is not completely specified, such as the absence of detailed operations for the DFHEISTG structure or the contents and structure of the DFHCOMMAREA, and note these as areas for further clarification.",
72
+ "781",
73
+ "It worked!"
74
+ ]
75
+ }
76
+ ```
77
+
78
+ ## Preprocessing Scripts
79
+ Scripts can be found under `janus/scripts/preprocessing/self-eval/`
80
+
81
+ | Eval | Script | Description |
82
+ | ----------- | ----------- | ----------- |
83
+ | Inline Comments | `append_comments.py` | `In`: Directory to json files that have 'code' + 'comments' + "file ending" `Out`: Same directory structure in the output directory, with files of a given language that have comments appended.
84
+ | Inline Comments | `split_processed_comments.py` | `In`: Path to processed.json file that have 'experiments' + 'generated_comment_texts' `Out`: 'experiments' as file names, with each 'processed' + 'generated_comment_texts' pair split into json.
85
+ | Incose | `split_processed_reqiurements.py` |`In`: Directory to JSON files with multiple 'code' str and 'requirement' array `Out`: Same directory structure in the output directory, with individual 'code' + 'requirement' pairs.
@@ -0,0 +1,39 @@
1
+ {
2
+ "code": "DFHEISTG DSECT\nSTIMERID DS CL04 STIMERM ID\nMS_WAIT DS F Fullword 01ms to 900ms WAIT\n*\n***********************************************************************\n* Dynamic Storage Area (end ) *\n***********************************************************************\n*\n***********************************************************************\n* DFHCOMMAREA from requesting program (start) *\n***********************************************************************\nCACBAR EQU 12",
3
+ "requirements": [
4
+ "1. 981273129",
5
+ "2. Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12 and that it is secure and that it is understandable and the testing must have 100% coverage and it must return useful content to a mixed audience.",
6
+ "3. The MS_WAIT field shall be a fullword integer (4 bytes) designed to store a wait time ranging from 1 millisecond to 900 milliseconds.",
7
+ "4. The software shall consider the DFHEISTG data structure as the end of a dynamic storage area.",
8
+ "5. The software shall recognize a constant named CACBAR, which is equivalent to the numeric value 12, indicating the beginning of a DFHCOMMAREA from the requesting program.",
9
+ "6. Hi there!",
10
+ "7. Limitations: The provided code snippet does not specify operations or methods for interacting with the DFHEISTG data structure or the DFHCOMMAREA. The functionality related to these structures must be defined in additional requirements or specifications not provided in the code snippet.",
11
+ "8. Testing of the DFHEISTG structure shall verify that the STIMERID field can store a 4-byte character string and that the MS_WAIT field can store an integer value within the range of 1 to 900. Additionally, testing should confirm that these fields are part of the defined dynamic storage area.",
12
+ "9. Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12.",
13
+ "10. The software requirements specification document must acknowledge areas where the code's functionality is not completely specified, such as the absence of detailed operations for the DFHEISTG structure or the contents and structure of the DFHCOMMAREA, and note these as areas for further clarification.",
14
+ "11. 781",
15
+ "12. helloworld",
16
+ "3. The MS_WAIT field shall be a fullword integer (4 bytes) designed to store a wait time ranging from 1 millisecond to 900 milliseconds.",
17
+ "4. The software shall consider the DFHEISTG data structure as the end of a dynamic storage area.",
18
+ "5. The software shall recognize a constant named CACBAR, which is equivalent to the numeric value 12, indicating the beginning of a DFHCOMMAREA from the requesting program.",
19
+ "6. Hi there!",
20
+ "7. Limitations: The provided code snippet does not specify operations or methods for interacting with the DFHEISTG data structure or the DFHCOMMAREA. The functionality related to these structures must be defined in additional requirements or specifications not provided in the code snippet.",
21
+ "8. Testing of the DFHEISTG structure shall verify that the STIMERID field can store a 4-byte character string and that the MS_WAIT field can store an integer value within the range of 1 to 900. Additionally, testing should confirm that these fields are part of the defined dynamic storage area.",
22
+ "9. Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12.",
23
+ "10. The software requirements specification document must acknowledge areas where the code's functionality is not completely specified, such as the absence of detailed operations for the DFHEISTG structure or the contents and structure of the DFHCOMMAREA, and note these as areas for further clarification.",
24
+ "11. 781",
25
+ "12. helloworld",
26
+ "3. The MS_WAIT field shall be a fullword integer (4 bytes) designed to store a wait time ranging from 1 millisecond to 900 milliseconds.",
27
+ "4. The software shall consider the DFHEISTG data structure as the end of a dynamic storage area.",
28
+ "5. The software shall recognize a constant named CACBAR, which is equivalent to the numeric value 12, indicating the beginning of a DFHCOMMAREA from the requesting program.",
29
+ "6. Hi there!",
30
+ "7. Limitations: The provided code snippet does not specify operations or methods for interacting with the DFHEISTG data structure or the DFHCOMMAREA. The functionality related to these structures must be defined in additional requirements or specifications not provided in the code snippet.",
31
+ "8. Testing of the DFHEISTG structure shall verify that the STIMERID field can store a 4-byte character string and that the MS_WAIT field can store an integer value within the range of 1 to 900. Additionally, testing should confirm that these fields are part of the defined dynamic storage area.",
32
+ "9. Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12.",
33
+ "10. The software requirements specification document must acknowledge areas where the code's functionality is not completely specified, such as the absence of detailed operations for the DFHEISTG structure or the contents and structure of the DFHCOMMAREA, and note these as areas for further clarification.",
34
+ "11. 781",
35
+ "12. helloworld",
36
+ "13. The software shall provide audit logs that track all changes to critical system settings.",
37
+ "14. If it reaches me, it worked!"
38
+ ]
39
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "code": "DFHEISTG DSECT\nAPPLID DS CL08 CICS Applid\nSYSID DS CL04 CICS SYSID\n*\n***********************************************************************\n* Dynamic Storage Area (End) *\n***********************************************************************\n*\n***********************************************************************\n* DFHCOMMAREA *\n***********************************************************************\n*",
3
+ "requirements": [
4
+ "981273129",
5
+ "Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12 and that it is secure and that it is understandable and the testing must have 100% coverage and it must return useful content to a mixed audience.",
6
+ "The MS_WAIT field shall be a fullword integer (4 bytes) designed to store a wait time ranging from 1 millisecond to 900 milliseconds.",
7
+ "The software shall consider the DFHEISTG data structure as the end of a dynamic storage area.",
8
+ "The software shall recognize a constant named CACBAR, which is equivalent to the numeric value 12, indicating the beginning of a DFHCOMMAREA from the requesting program.",
9
+ "Hi there!",
10
+ "Limitations: The provided code snippet does not specify operations or methods for interacting with the DFHEISTG data structure or the DFHCOMMAREA. The functionality related to these structures must be defined in additional requirements or specifications not provided in the code snippet.",
11
+ "Testing of the DFHEISTG structure shall verify that the STIMERID field can store a 4-byte character string and that the MS_WAIT field can store an integer value within the range of 1 to 900. Additionally, testing should confirm that these fields are part of the defined dynamic storage area.",
12
+ "Testing for the CACBAR constant shall verify that it is recognized by the software as indicating the beginning of a DFHCOMMAREA and that it has been assigned a numeric value of 12.",
13
+ "The software requirements specification document must acknowledge areas where the code's functionality is not completely specified, such as the absence of detailed operations for the DFHEISTG structure or the contents and structure of the DFHCOMMAREA, and note these as areas for further clarification.",
14
+ "781",
15
+ "It worked!"
16
+ ]
17
+ }
@@ -0,0 +1,71 @@
1
+ DGJ1INIP ; <BLOCK_COMMENT 68620e36> ALB/MRY - Post-install of DGJ v1.0 ;[ 11/21/01 13:20 pm] 1.0;Incomplete Records Tracking;;Jun 25, 2001 Update PACKAGE (#2) File fields not updated by the KIDS install. Used for documentation purposes.
2
+ ;
3
+ D BMES^XPDUTL(" ")
4
+ D MES^XPDUTL(" Updating PACKAGE File...")
5
+ ;
6
+ PKG ; <BLOCK_COMMENT c3f339c2> Retrieve 'DGJ' Package name Get Package IEN
7
+ D FIND^DIC(9.4,"","@;1","P","DGJ","","C","","","DGJARRY")
8
+ S (DGJI,DGJIEN)=0
9
+ F S DGJI=$O(DGJARRY("DILIST",DGJI)) Q:'DGJI D
10
+ . Q:$P($G(DGJARRY("DILIST",DGJI,0)),"^",2)'="DGJ"
11
+ . S DGJIEN=$P($G(DGJARRY("DILIST",DGJI,0)),"^",1)
12
+ I 'DGJIEN D G ABRT^DGJ1INIT
13
+ . D BMES^XPDUTL(" ")
14
+ . D MES^XPDUTL(" No Package entry defined - Cannot update!")
15
+ ;
16
+ UPD ; <BLOCK_COMMENT 13d585bb> - Update fields not updated by the KIDS install. fields: Short Description (#2); required field Description (#3) ; word processing field File (#6) ; multiple Fields ; multiple - Replace Short Description (#2) field.
17
+ K FDATA S FDATA(9.4,DGJIEN_",",2)="IRT"
18
+ D FILE^DIE("E","FDATA","ERR")
19
+ D BMES^XPDUTL(" ")
20
+ D MES^XPDUTL(" SHORT DESCRIPTION field complete.")
21
+ K FDATA,ERR
22
+ ;
23
+ ; <BLOCK_COMMENT a544fe9b> - Replace Description (#3) field. Leave blank.
24
+ D WP^DIE(9.4,DGJIEN_",",3,"K","")
25
+ D BMES^XPDUTL(" ")
26
+ D MES^XPDUTL(" DESCRIPTION field complete.")
27
+ ;
28
+ FILE ; <BLOCK_COMMENT 78e9dfc2> - Replace File (#9.44) field. first remove (delete) existing fields.
29
+ N DIC K DGJARRY
30
+ D LIST^DIC(9.44,","_DGJIEN_",","@;.01","P","","","","","","","DGJARRY")
31
+ S DGJII=0
32
+ F S DGJII=$O(DGJARRY("DILIST",DGJII)) Q:'DGJII D
33
+ . S DGJFIEN=$P(DGJARRY("DILIST",DGJII,0),"^",1)
34
+ . S DIE="^DIC(9.4,",DA(1)=DGJIEN,DIE=DIE_DA(1)_",4,"
35
+ . S DA=+DGJFIEN,DR=".01///@" D ^DIE
36
+ ; <BLOCK_COMMENT 73a11ae6> after old entries deleted, add current File entries
37
+ F X="40.8","43","405","393","393.1","393.2","393.3","393.41" D
38
+ . S DIC="^DIC(9.4,",DA(1)=DGJIEN
39
+ . S DIC=DIC_DA(1)_",4,",DIC(0)="L",DIC("P")=$P(^DD(9.4,6,0),"^",2)
40
+ . D ^DIC
41
+ D BMES^XPDUTL(" ")
42
+ D MES^XPDUTL(" FILE field complete.")
43
+ ; <BLOCK_COMMENT 31af7bc2> - Add fields.
44
+ FLDS ; <BLOCK_COMMENT ba8fcbcf> Add FIELD entries to File entries #40.8, #43, #405. Add ASSIGN A VERSION NUMBER? entries to File entries #393-393.41.
45
+ S DIC="^DIC(9.4,",DA(1)=DGJIEN,DIC(0)="X"
46
+ S DIC=DIC_DA(1)_",4,",DIC("P")=$P(^DD(9.4,6,0),"^",2)
47
+ F X="40.8","43","405","393","393.1","393.2","393.3","393.41" D
48
+ . D ^DIC
49
+ . S DIE=DIC,DA=+Y
50
+ . I X="40.8" F FLD="100.01","100.02","100.03","100.04","100.05","100.06","100.07","100.08","100.09","100.1","100.2","100.3" S DR="2///"_FLD D ^DIE
51
+ . I X="43" F FLD="401","513" S DR="2///"_FLD D ^DIE
52
+ . I X=405 S FLD=60.01 S DR="2///"_FLD D ^DIE
53
+ . I X[393 S DR="222.2///Y" D ^DIE
54
+ D BMES^XPDUTL(" ")
55
+ D MES^XPDUTL(" FIELD field complete.")
56
+ ;
57
+ XPREF ; <BLOCK_COMMENT 03a34d10> Add 'DGJZ' entry to the EXCLUDED NAME SPACE (#919) field of DGJ.
58
+ S DIC="^DIC(9.4,",DA(1)=DGJIEN
59
+ S DIC=DIC_DA(1)_",""EX"",",DIC(0)="L",DIC("P")=$P(^DD(9.4,919,0),"^",2)
60
+ S X="DGJZ" D ^DIC
61
+ ;
62
+ ; <BLOCK_COMMENT 168906bd> Add 'DGJ' entry to the EXCLUDED NAME SPACE (#919) field of DG.
63
+ S DIC="^DIC(9.4,",DIC(0)="X",X="REGISTRATION"
64
+ D ^DIC I Y<0 D G ABRT^DGJ1INIT
65
+ . D BMES^XPDUTL("REGISTRATION PACKAGE HAS NOT BEEN FOUND")
66
+ . D MES^XPDUTL("CONTACT - PIMS National VISTA Support Team for assistance!")
67
+ S DA(1)=+Y,DIC=DIC_DA(1)_",""EX"",",DIC(0)="L",DIC("P")=$P(^DD(9.4,919,0),"^",2)
68
+ S X="DGJ" D ^DIC
69
+ ;
70
+ EXIT K DIC,DIE,DGJII,DGJFIEN,DGJIEN,X,DA,DGJARRY
71
+ Q
janus/_tests/test_cli.py CHANGED
@@ -4,7 +4,8 @@ from unittest.mock import ANY, patch
4
4
 
5
5
  from typer.testing import CliRunner
6
6
 
7
- from janus.cli import app, translate
7
+ from janus.cli.cli import app
8
+ from janus.cli.translate import translate
8
9
  from janus.embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
10
  from janus.llm.models_info import MODEL_CONFIG_DIR
10
11
 
@@ -125,4 +126,4 @@ class TestCli(unittest.TestCase):
125
126
 
126
127
  # Assert
127
128
  mock_translate.assert_called_once()
128
- mock_translate.assert_called_once_with(ANY, "janus/", "janus/", True, None)
129
+ mock_translate.assert_called_once_with(ANY, "janus/", "janus/", None, True, None)
janus/cli/aggregate.py ADDED
@@ -0,0 +1,135 @@
1
+ from pathlib import Path
2
+ from typing import List, Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import get_subclasses
9
+ from janus.converter.converter import Converter
10
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
11
+ from janus.utils.enums import LANGUAGES
12
+
13
+
14
+ def aggregate(
15
+ input_dir: Annotated[
16
+ Path,
17
+ typer.Option(
18
+ "--input",
19
+ "-i",
20
+ help="The directory containing the source code to be translated. "
21
+ "The files should all be in one flat directory.",
22
+ ),
23
+ ],
24
+ language: Annotated[
25
+ str,
26
+ typer.Option(
27
+ "--language",
28
+ "-l",
29
+ help="The language of the source code.",
30
+ click_type=click.Choice(sorted(LANGUAGES)),
31
+ ),
32
+ ],
33
+ output_dir: Annotated[
34
+ Path,
35
+ typer.Option(
36
+ "--output-dir", "-o", help="The directory to store the translated code in."
37
+ ),
38
+ ],
39
+ llm_name: Annotated[
40
+ str,
41
+ typer.Option(
42
+ "--llm",
43
+ "-L",
44
+ help="The custom name of the model set with 'janus llm add'.",
45
+ ),
46
+ ],
47
+ failure_dir: Annotated[
48
+ Optional[Path],
49
+ typer.Option(
50
+ "--failure-directory",
51
+ "-f",
52
+ help="The directory to store failure files during translation",
53
+ ),
54
+ ] = None,
55
+ max_prompts: Annotated[
56
+ int,
57
+ typer.Option(
58
+ "--max-prompts",
59
+ "-m",
60
+ help="The maximum number of times to prompt a model on one functional block "
61
+ "before exiting the application. This is to prevent wasting too much money.",
62
+ ),
63
+ ] = 10,
64
+ overwrite: Annotated[
65
+ bool,
66
+ typer.Option(
67
+ "--overwrite/--preserve",
68
+ help="Whether to overwrite existing files in the output directory",
69
+ ),
70
+ ] = False,
71
+ temperature: Annotated[
72
+ float,
73
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
74
+ ] = 0.7,
75
+ collection: Annotated[
76
+ str,
77
+ typer.Option(
78
+ "--collection",
79
+ "-c",
80
+ help="If set, will put the translated result into a Chroma DB "
81
+ "collection with the name provided.",
82
+ ),
83
+ ] = None,
84
+ splitter_type: Annotated[
85
+ str,
86
+ typer.Option(
87
+ "-S",
88
+ "--splitter",
89
+ help="Name of custom splitter to use",
90
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
91
+ ),
92
+ ] = "file",
93
+ intermediate_converters: Annotated[
94
+ List[str],
95
+ typer.Option(
96
+ "-C",
97
+ "--converter",
98
+ help="Name of an intermediate converter to use",
99
+ click_type=click.Choice([c.__name__ for c in get_subclasses(Converter)]),
100
+ ),
101
+ ] = ["Documenter"],
102
+ ):
103
+ from janus.cli.constants import db_loc, get_collections_config
104
+ from janus.converter.aggregator import Aggregator
105
+
106
+ converter_subclasses = get_subclasses(Converter)
107
+ converter_subclasses_map = {c.__name__: c for c in converter_subclasses}
108
+ model_arguments = dict(temperature=temperature)
109
+ collections_config = get_collections_config()
110
+ converters = []
111
+ for ic in intermediate_converters:
112
+ converters.append(
113
+ converter_subclasses_map[ic](
114
+ model=llm_name,
115
+ model_arguments=model_arguments,
116
+ source_language=language,
117
+ max_prompts=max_prompts,
118
+ db_path=db_loc,
119
+ db_config=collections_config,
120
+ splitter_type=splitter_type,
121
+ )
122
+ )
123
+
124
+ aggregator = Aggregator(
125
+ intermediate_converters=converters,
126
+ model=llm_name,
127
+ model_arguments=model_arguments,
128
+ source_language=language,
129
+ max_prompts=max_prompts,
130
+ db_path=db_loc,
131
+ db_config=collections_config,
132
+ splitter_type=splitter_type,
133
+ prompt_template="basic_aggregation",
134
+ )
135
+ aggregator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
janus/cli/cli.py ADDED
@@ -0,0 +1,111 @@
1
+ import logging
2
+
3
+ import typer
4
+ from rich import print
5
+
6
+ from janus.cli.aggregate import aggregate
7
+ from janus.cli.database import db
8
+ from janus.cli.diagram import diagram, render
9
+ from janus.cli.document import document
10
+ from janus.cli.embedding import embedding
11
+ from janus.cli.llm import llm
12
+ from janus.cli.partition import partition
13
+ from janus.cli.self_eval import llm_self_eval
14
+ from janus.cli.translate import translate
15
+ from janus.metrics.cli import evaluate
16
+ from janus.utils.logger import create_logger
17
+
18
+ httpx_logger = logging.getLogger("httpx")
19
+ httpx_logger.setLevel(logging.WARNING)
20
+
21
+ log = create_logger(__name__)
22
+
23
+
24
+ app = typer.Typer(
25
+ help=(
26
+ "[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
27
+ "documenting, and diagramming code using large language models.[/bold]"
28
+ ),
29
+ add_completion=False,
30
+ no_args_is_help=True,
31
+ context_settings={"help_option_names": ["-h", "--help"]},
32
+ rich_markup_mode="rich",
33
+ )
34
+
35
+
36
+ def version_callback(value: bool) -> None:
37
+ if value:
38
+ from janus import __version__ as version
39
+
40
+ print(f"Janus CLI [blue]v{version}[/blue]")
41
+ raise typer.Exit()
42
+
43
+
44
+ @app.callback()
45
+ def common(
46
+ ctx: typer.Context,
47
+ version: bool = typer.Option(
48
+ None,
49
+ "--version",
50
+ "-v",
51
+ callback=version_callback,
52
+ help="Print the version and exit.",
53
+ ),
54
+ ) -> None:
55
+ """A function for getting the app version
56
+
57
+ This will call the version_callback function to print the version and exit.
58
+
59
+ Arguments:
60
+ ctx: The typer context
61
+ version: A boolean flag for the version
62
+ """
63
+ pass
64
+
65
+
66
+ aggregate = app.command(
67
+ help=(
68
+ "Aggregate intermediate representations together up to higher levels of "
69
+ "abstraction."
70
+ ),
71
+ no_args_is_help=True,
72
+ )(aggregate)
73
+
74
+ diagram = app.command(
75
+ help="Diagram input code using an LLM.",
76
+ no_args_is_help=True,
77
+ )(diagram)
78
+
79
+ document = app.command(
80
+ help="Document input code using an LLM.",
81
+ no_args_is_help=True,
82
+ )(document)
83
+
84
+ llm_self_eval = app.command(
85
+ help="Use an LLM to evaluate its own performance.",
86
+ no_args_is_help=True,
87
+ )(llm_self_eval)
88
+
89
+ partition = app.command(
90
+ help="Partition input code using an LLM.",
91
+ no_args_is_help=True,
92
+ )(partition)
93
+
94
+ render = app.command(
95
+ help="Render PlantUML from JSON output.",
96
+ no_args_is_help=True,
97
+ )(render)
98
+
99
+ translate = app.command(
100
+ help="Translate code from one language to another using an LLM.",
101
+ no_args_is_help=True,
102
+ )(translate)
103
+
104
+ app.add_typer(db, name="db")
105
+ app.add_typer(llm, name="llm")
106
+ app.add_typer(evaluate, name="evaluate")
107
+ app.add_typer(embedding, name="embedding")
108
+
109
+
110
+ if __name__ == "__main__":
111
+ app()
janus/cli/constants.py ADDED
@@ -0,0 +1,43 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import janus.refiners.format
5
+ import janus.refiners.refiner
6
+ import janus.refiners.uml
7
+
8
+ homedir = Path.home().expanduser()
9
+
10
+ janus_dir = homedir / ".janus"
11
+ if not janus_dir.exists():
12
+ janus_dir.mkdir(parents=True)
13
+
14
+ db_file = janus_dir / ".db"
15
+ if not db_file.exists():
16
+ with open(db_file, "w") as f:
17
+ f.write(str(janus_dir / "chroma.db"))
18
+
19
+ with open(db_file, "r") as f:
20
+ db_loc = f.read()
21
+
22
+ collections_config_file = Path(db_loc) / "collections.json"
23
+
24
+
25
+ def get_subclasses(cls):
26
+ return set(cls.__subclasses__()).union(
27
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
28
+ )
29
+
30
+
31
+ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
32
+ {janus.refiners.refiner.JanusRefiner}
33
+ )
34
+ REFINERS = {r.__name__: r for r in REFINER_TYPES}
35
+
36
+
37
+ def get_collections_config():
38
+ if collections_config_file.exists():
39
+ with open(collections_config_file, "r") as f:
40
+ config = json.load(f)
41
+ else:
42
+ config = {}
43
+ return config