janus-llm 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +126 -0
- janus/cli/self_eval.py +10 -3
- janus/cli/translate.py +10 -1
- janus/converter/__init__.py +2 -0
- janus/converter/_tests/test_translate.py +6 -5
- janus/converter/chain.py +100 -0
- janus/converter/converter.py +467 -90
- janus/converter/diagram.py +12 -8
- janus/converter/document.py +17 -7
- janus/converter/evaluate.py +174 -147
- janus/converter/partition.py +6 -11
- janus/converter/passthrough.py +29 -0
- janus/converter/pool.py +74 -0
- janus/converter/requirements.py +7 -40
- janus/converter/translate.py +2 -58
- janus/language/_tests/test_combine.py +1 -0
- janus/language/block.py +115 -5
- janus/llm/model_callbacks.py +6 -0
- janus/llm/models_info.py +19 -0
- janus/metrics/_tests/test_reading.py +48 -4
- janus/metrics/_tests/test_rouge_score.py +5 -11
- janus/metrics/metric.py +47 -124
- janus/metrics/reading.py +48 -28
- janus/metrics/rouge_score.py +21 -34
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/eval_parsers/incose_parser.py +3 -3
- janus/parsers/reqs_parser.py +3 -3
- janus/prompts/templates/cyclic/human.txt +16 -0
- janus/prompts/templates/cyclic/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
- janus/prompts/templates/extract_variables/human.txt +5 -0
- janus/prompts/templates/extract_variables/system.txt +1 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +14 -15
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +46 -40
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
janus/__init__.py,sha256=
|
1
|
+
janus/__init__.py,sha256=XDTXq0kgLKa_aC6hDm1Zt_fmd5COyTAiSIAAre6lHkg,361
|
2
2
|
janus/__main__.py,sha256=Z1-R163-Ryac2kY1sKYfflqBl40nm-o54tlPjGiOP98,68
|
3
3
|
janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
|
@@ -7,28 +7,32 @@ janus/_tests/evaluator_tests/incose_tests/incose_large_test.json,sha256=_tiBnF_D
|
|
7
7
|
janus/_tests/evaluator_tests/incose_tests/incose_small_test.json,sha256=KHizvtrEvRLrBw25q34I-qonDyFUSUpHpOpFhCmKjAo,2557
|
8
8
|
janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m,sha256=i2U_YymEFxpX9w9_5wN_bV4ZHzk8THyVdvhH-2Xildk,3490
|
9
9
|
janus/_tests/test_cli.py,sha256=Y1Gx17fBNTQWJGmjPff8bZgg5AnUY0fMWAHzQ0qwi08,4316
|
10
|
-
janus/cli/aggregate.py,sha256=
|
11
|
-
janus/cli/cli.py,sha256
|
12
|
-
janus/cli/constants.py,sha256=
|
10
|
+
janus/cli/aggregate.py,sha256=MlK1ZHrCLcwp8Pl4sronWZaB5Pp6AkWiCtHMriOkLgQ,4139
|
11
|
+
janus/cli/cli.py,sha256=8n7Eo9-2-joVnCTDzrGu4RgemNtyuDaasaVGlhEiQXc,2820
|
12
|
+
janus/cli/constants.py,sha256=4mb3YW18Q8bWAfKa05IJbXUxzQrmN81ELL5pR37PtCw,1206
|
13
13
|
janus/cli/database.py,sha256=EkLv_fJNlG6oKvCcEzezdHIChaJlPUl3AgeejJU4rW0,9540
|
14
|
-
janus/cli/diagram.py,sha256=
|
15
|
-
janus/cli/document.py,sha256=
|
14
|
+
janus/cli/diagram.py,sha256=gOOneADH2klUzUx8CEM6qRtq2C5zDukzkF74R46DgWs,6254
|
15
|
+
janus/cli/document.py,sha256=3tp85dIZ6sF59v-fEE1BLzk9Hkv4tL5CuFtNWvVzhBQ,5685
|
16
16
|
janus/cli/embedding.py,sha256=WBNtcWPrXB4VfzKjeB_fOUm945gzgGjF2XbZkGltgYY,4215
|
17
|
-
janus/cli/llm.py,sha256=
|
18
|
-
janus/cli/partition.py,sha256=
|
19
|
-
janus/cli/
|
20
|
-
janus/cli/
|
21
|
-
janus/
|
17
|
+
janus/cli/llm.py,sha256=JfSPUfkGAQyg-chc18ZoBu8K8w0Q9OUbktHsG90wnsg,6537
|
18
|
+
janus/cli/partition.py,sha256=AHpB0mfhoQzcD0lAQFObbbD2Pw3KE5Hk0r_2XapT6Fg,3898
|
19
|
+
janus/cli/pipeline.py,sha256=nPQt0gUzJxW10v0rj1giaMxq6XPauU4Y8-a7WbLJu80,3820
|
20
|
+
janus/cli/self_eval.py,sha256=oQo3ks3GptKV3Sjcp67ILpLv7VrWP55naK-tv_ehkJc,4683
|
21
|
+
janus/cli/translate.py,sha256=Etob8z-SU0V8oEimUtYfPvYXHgLUP6pkf4IRDr4dbU0,5948
|
22
|
+
janus/converter/__init__.py,sha256=j30j1moH4sEzIZhDcy-_WrP5XGncRWXHfppPTJcan34,501
|
22
23
|
janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
janus/converter/_tests/test_translate.py,sha256=
|
24
|
+
janus/converter/_tests/test_translate.py,sha256=Sbhn-OjrKtB8CCtZz_CPjlEezEN3qwHcES1C-IeQtFc,5796
|
24
25
|
janus/converter/aggregator.py,sha256=MuAXMKmq6PuUo_w6ljyiuDn81Gk2dN-Ci7FVeLc6vhs,1966
|
25
|
-
janus/converter/
|
26
|
-
janus/converter/
|
27
|
-
janus/converter/
|
28
|
-
janus/converter/
|
29
|
-
janus/converter/
|
30
|
-
janus/converter/
|
31
|
-
janus/converter/
|
26
|
+
janus/converter/chain.py,sha256=XBNf2_S50SsZC8w-jMcGNyAmgYFOaphRVnazTzY7aq0,4138
|
27
|
+
janus/converter/converter.py,sha256=Zkvxs-Lu9_wXm4gfEODrttugGIbPv3DRq-Q88KZouE0,43418
|
28
|
+
janus/converter/diagram.py,sha256=eURspGgvVGyFioR1QC4EiD6jgss3uJemVpFN0IOQ59g,1898
|
29
|
+
janus/converter/document.py,sha256=aPvALPjbo37FBMrXEjtcuoIvtMTE4OYMXR9K9PVuieE,5477
|
30
|
+
janus/converter/evaluate.py,sha256=Sj0a0cKnHwMRjwQMtArwCzspMazkcFf76-r1nJDbDuo,11540
|
31
|
+
janus/converter/partition.py,sha256=FWLFyC_7BUzoG5Rr-r8ATrtgSmsJI5p4Ul1mZRtuMTY,737
|
32
|
+
janus/converter/passthrough.py,sha256=clVe3Cd00CaJncyCvoNL7JDE_rbe4jiYTSx4EC5ooQY,1116
|
33
|
+
janus/converter/pool.py,sha256=Lylx97MSKLibjDbFLZhZ0ZX1nTqjPTOEg90hLqkxplQ,2973
|
34
|
+
janus/converter/requirements.py,sha256=GWcyrOR0Q1XXKDKSq8u3nLZxcjPq5IxzWRyquoCv12M,781
|
35
|
+
janus/converter/translate.py,sha256=jwuhSWIzhjuMJEbHnZS9Y2D8FXhfs6W8PLfpiAeRE6M,1973
|
32
36
|
janus/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
37
|
janus/embedding/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
38
|
janus/embedding/_tests/test_collections.py,sha256=llg-JSuRRFhKkHFiWWSHEWV3iaT6Lwue0lp2tEml9io,2668
|
@@ -40,7 +44,7 @@ janus/embedding/embedding_models_info.py,sha256=ZV0Ve4_Zq_B_fnL3QohAQUHlRvQGg0mi
|
|
40
44
|
janus/embedding/vectorize.py,sha256=s5DWrWuH0M9qYbLl4oqvwZzZo-ISjGU8Inc9KERQkSc,6449
|
41
45
|
janus/language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
46
|
janus/language/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
43
|
-
janus/language/_tests/test_combine.py,sha256=
|
47
|
+
janus/language/_tests/test_combine.py,sha256=_v8X82Fby50y1fX2_vggV0XglxUUqiUQBkZXxglZjpM,1871
|
44
48
|
janus/language/_tests/test_splitter.py,sha256=Hqexa39LLEXlK3ZUw7Zot4PUIACvye2vkq0Jaox0T10,373
|
45
49
|
janus/language/alc/__init__.py,sha256=j7vOMGhT1Vri6p8dsjSaY-fkO5uFn0sJ0nrNGGvcizM,42
|
46
50
|
janus/language/alc/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -53,7 +57,7 @@ janus/language/binary/_tests/hello.bin,sha256=SRcqx55Vm9GhstlNSA1jDm0AJCCPhP0Xuq
|
|
53
57
|
janus/language/binary/_tests/test_binary.py,sha256=cIKIxjj6kIY3rcxLwqUPESP9bxWrHqMHx9TNuICgfeQ,1724
|
54
58
|
janus/language/binary/binary.py,sha256=PHsVa8jcM7sW9aTboGRWXj6ewQznz0kVPNWtP4B9YPU,6555
|
55
59
|
janus/language/binary/reveng/decompile_script.py,sha256=veW51oJzuO-4UD3Er062jXZ_FYtTFo9OCkl82Z2xr6A,2182
|
56
|
-
janus/language/block.py,sha256=
|
60
|
+
janus/language/block.py,sha256=wB6m-sbTaVGNAtwvCnbZam3EmShLt18roFSLqBSpBwE,14054
|
57
61
|
janus/language/combine.py,sha256=egZRl1xZXAFXa2ZjjfqnNckc9uxuo6e1MJgkRrCgvd8,3650
|
58
62
|
janus/language/file.py,sha256=dh2S3owuDOAKaV-GopLmdezmZ4U_Fyp2pLyGmEf0-QU,581
|
59
63
|
janus/language/mumps/__init__.py,sha256=-Ou_wJ-JgHezfp1dub2_qCYNiK9wO-zo2MlqxM9qiwE,48
|
@@ -78,8 +82,8 @@ janus/language/treesitter/_tests/languages/matlab.m,sha256=N29hh2dgjHh1kFHxeOMgS
|
|
78
82
|
janus/language/treesitter/_tests/test_treesitter.py,sha256=fmr_mFSja7vaCVu0TVyLDua3A94jMjY4AqSC5NqnOdQ,2179
|
79
83
|
janus/language/treesitter/treesitter.py,sha256=FdsBO8CEo6l9D77aHXns5jRSoZzkvrRGZFCW3oNw15c,7928
|
80
84
|
janus/llm/__init__.py,sha256=TKLYvnsWKWfxMucy-lCLQ-4bkN9ENotJZDywDEQmrKg,45
|
81
|
-
janus/llm/model_callbacks.py,sha256=
|
82
|
-
janus/llm/models_info.py,sha256=
|
85
|
+
janus/llm/model_callbacks.py,sha256=pNeCENgatzyLOPjgEXxcu7YaH-805VlqVs8wHIZ8W_k,8389
|
86
|
+
janus/llm/models_info.py,sha256=Knm43qEuyl0QjKfcNsaJkLdjFKpzaapi6Q7SjqBWGJ0,11831
|
83
87
|
janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
|
84
88
|
janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
89
|
janus/metrics/_tests/asm_test_file.asm,sha256=5QXVhXVCZKiaLO0I58yFKGAChcSC-8wOESUCBWcLbmM,231
|
@@ -89,9 +93,8 @@ janus/metrics/_tests/target.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE
|
|
89
93
|
janus/metrics/_tests/test_bleu.py,sha256=k9hSSdGMITsTiXs-Aj4Q5t1_p2v2eD0O9m-vcRi2GKo,1652
|
90
94
|
janus/metrics/_tests/test_chrf.py,sha256=I6AB323mSNj9_0dRIgQdslN0OcQH2fzYxdep68d1LZU,2214
|
91
95
|
janus/metrics/_tests/test_file_pairing.py,sha256=8qB9xaZlaWmc6oAqQGYreRZLrZmrlVALcgtCz0Zp0uA,1892
|
92
|
-
janus/metrics/_tests/
|
93
|
-
janus/metrics/_tests/
|
94
|
-
janus/metrics/_tests/test_rouge_score.py,sha256=dnP99nry-U5wyE-CiC0eQwm78IyScnmGQH3BeIEdmLY,2032
|
96
|
+
janus/metrics/_tests/test_reading.py,sha256=5RJRX0EmQG2ZPTLpM4pCAd0LnZ9dPgTZc-_nn8sRHFE,2301
|
97
|
+
janus/metrics/_tests/test_rouge_score.py,sha256=4Jo1jox4YORYNsctN2zcG2WWP565k-UkujyUEH-xLCM,1855
|
95
98
|
janus/metrics/_tests/test_similarity_score.py,sha256=tdzH_8hYb2h7fKxpd_a75di-GPnU_frZ0zn2aeYzkso,811
|
96
99
|
janus/metrics/_tests/test_treesitter_metrics.py,sha256=sp_IorshEHjkbsyNeuWMEs-KZKABB4n7A4XnTCySErM,4356
|
97
100
|
janus/metrics/bleu.py,sha256=UCrerEtEF5TadbY-vxbr5TQv67j8hrOCk8Q2C5OQXlw,1759
|
@@ -99,8 +102,7 @@ janus/metrics/chrf.py,sha256=c8AJLVUtzOezH0R7cJ9hwkLQCIHonm56TTYRTmYuBK4,1485
|
|
99
102
|
janus/metrics/cli.py,sha256=Duuw2RF47Z-t1pal0cg3L_-N_91rx29krirqtIwjYLY,157
|
100
103
|
janus/metrics/complexity_metrics.py,sha256=9sHxa9tY9IgdFC4grCmDSuu8Nkphm_FWbTecxgW78go,6574
|
101
104
|
janus/metrics/file_pairing.py,sha256=b0848gbDTqKVge-uGiujtHdsgcbkpJ04nWF2ul_gbCw,3749
|
102
|
-
janus/metrics/
|
103
|
-
janus/metrics/metric.py,sha256=ZQXz2LLXA3_WW6BluF8Q2LdJKYVWZLYmhO4Ai116ZPY,17168
|
105
|
+
janus/metrics/metric.py,sha256=CXnj6gM1LzMJ2QUPWc1N7bg3V36EdHlLo3Idu31o7E0,13709
|
104
106
|
janus/metrics/prompts/clarity.txt,sha256=UTo7a_E28a9JIto6wskQSUANPGdO6oPxf_3B2sqhgZw,328
|
105
107
|
janus/metrics/prompts/completeness.txt,sha256=5z4a_9a2ruToQnQiW6S02A_IYBIOgwg4KIWHXChA2Co,518
|
106
108
|
janus/metrics/prompts/faithfulness.txt,sha256=UdvfuW99_OsT7D7GhVQpG1StwnC3PFZwNbENS_Gev_c,460
|
@@ -108,21 +110,21 @@ janus/metrics/prompts/hallucination.txt,sha256=CX0MUxkWBE5Yxupg3ypdXykte9mwAuoGf
|
|
108
110
|
janus/metrics/prompts/quality.txt,sha256=JqjKboSd4ueyEE0sg21Y-V9bxxu2li8a9ooGTUf02Vk,328
|
109
111
|
janus/metrics/prompts/readability.txt,sha256=shKvNxUZKtdoKkiA24XzE9XWseSCQFZDfxOGhUDBd-0,419
|
110
112
|
janus/metrics/prompts/usefulness.txt,sha256=DvZxJAbaRZ9R0K5KqADTt4BaJ0WLgyC5mxQ66xhIsJw,520
|
111
|
-
janus/metrics/reading.py,sha256=
|
112
|
-
janus/metrics/rouge_score.py,sha256=
|
113
|
+
janus/metrics/reading.py,sha256=KL_Q4nvoX6KvtmNmkRjgVTPvcZZp-V7_z1240LIYU3U,3755
|
114
|
+
janus/metrics/rouge_score.py,sha256=mDJ-fyluAqvL5go68nbMIFOkrO_yOmPT6OUXf3ESVuc,2468
|
113
115
|
janus/metrics/similarity.py,sha256=NKd-m8hMB27i1_1CQkG5o7HrAK2wTr89B21Y36kqHiE,1630
|
114
116
|
janus/metrics/splitting.py,sha256=610ScHRvALwdkqA6YyGI-tr3a18_cUofldBxGYX0SwE,968
|
115
117
|
janus/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
116
118
|
janus/parsers/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
117
|
-
janus/parsers/_tests/test_code_parser.py,sha256=
|
118
|
-
janus/parsers/code_parser.py,sha256
|
119
|
+
janus/parsers/_tests/test_code_parser.py,sha256=I3IyTDUjucjeOOdwd3hUv5W8FK2owieFxTAwiMA2V4I,922
|
120
|
+
janus/parsers/code_parser.py,sha256=-eNEgycQBgrLUDhN9FyeCDwPexrpUFp3kK2wOG560AI,845
|
119
121
|
janus/parsers/doc_parser.py,sha256=XVlzyMWYB_UUwNp5OQ0pxA-KP0ITCZT0CjpQN_mHbWQ,5957
|
120
122
|
janus/parsers/eval_parser.py,sha256=Gjh6aTZgpYd2ASJUEPMo4LpCL00cBmbOqc4KM3hy8x8,2922
|
121
|
-
janus/parsers/eval_parsers/incose_parser.py,sha256=
|
123
|
+
janus/parsers/eval_parsers/incose_parser.py,sha256=xZii46LzaKxglSdhRL-FHHhQ8sdnA7QHZsW5n5N9P10,4458
|
122
124
|
janus/parsers/eval_parsers/inline_comment_parser.py,sha256=QzKgzeWPhyIEkLxJBpeutSocSJjjXEcWRRS635bXEO8,3973
|
123
125
|
janus/parsers/parser.py,sha256=YwkiFthesUaxHngrt4v2lb_HYgKpusnlVgTMwi3WttE,1874
|
124
126
|
janus/parsers/partition_parser.py,sha256=2l4fERb7lf-RTKBTi-TZBys0kJXlzRJWeB8RHNzyTDs,5834
|
125
|
-
janus/parsers/reqs_parser.py,sha256=
|
127
|
+
janus/parsers/reqs_parser.py,sha256=5kS3gtiofZLfnV4GaPEiSwxx3bAG4k-DBDc5wuGF6MQ,2523
|
126
128
|
janus/parsers/uml.py,sha256=iB9j0gtaT4Zo4tYFCGQNRzXa6--wDCuw-sLeQk19kfc,3429
|
127
129
|
janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
128
130
|
janus/prompts/prompt.py,sha256=JPr6wvIu1ZpUtTa93WJxa3fbosswtIJqqOY5H8nE2XA,10586
|
@@ -131,6 +133,8 @@ janus/prompts/templates/basic_aggregation/human.txt,sha256=Aw2Jip8rYHoA_vYXJ3ve8
|
|
131
133
|
janus/prompts/templates/basic_aggregation/system.txt,sha256=8QgUYtx51aPJk-e7kHLfXKPPp2Y1YYfBmZwoXG0l2GQ,126
|
132
134
|
janus/prompts/templates/basic_refinement/human.txt,sha256=d_Yr8KKVD_xXpO4RzYKQzG_4T_Zce3qAiWl_roAF2H4,342
|
133
135
|
janus/prompts/templates/basic_refinement/system.txt,sha256=odlLG_ZOqd9TyJJ6e4S-1b-fLusrxLO3pZBem7iR3hQ,115
|
136
|
+
janus/prompts/templates/cyclic/human.txt,sha256=8LF07WXF4uVzNB77RQugrkgnYd7mObYg5-dBxydn2NA,597
|
137
|
+
janus/prompts/templates/cyclic/system.txt,sha256=MLKpPIhg_q74K8V0MP1AnswT4nw9r-e_Qt66QYisE_w,209
|
134
138
|
janus/prompts/templates/diagram/human.txt,sha256=P5WJd-_TzUMlaUjIZSH4o7pUCadmqWoY6GvZ-lv35hU,491
|
135
139
|
janus/prompts/templates/diagram/system.txt,sha256=cGepMHpxI4PX9hTmH-mDsqKZDKxSqjKzMbbWpHOyUpE,121
|
136
140
|
janus/prompts/templates/diagram_with_documentation/human.txt,sha256=_WeV70QDN3vS-4RVqSeri_vfxDmF_A-XGGG0GMAocIk,673
|
@@ -142,12 +146,14 @@ janus/prompts/templates/document_cloze/system.txt,sha256=w1SUmICn6zuO2MVw_CoYVkD
|
|
142
146
|
janus/prompts/templates/document_cloze/variables.json,sha256=DB5N72JXk7AXKSI1kdoCbLHv2pbDJZmTWZkuWIq7G3Q,1647
|
143
147
|
janus/prompts/templates/document_cloze/variables_asm.json,sha256=5wDJIG8adxbUGPj1_ol7H5Yh1qqDlQiXJOeLHgkaaa8,1065
|
144
148
|
janus/prompts/templates/document_inline/human.txt,sha256=k-tfS4YitYUZNTP0lvcc5_m1ZfWopVNeSZyQR9nX4yw,311
|
145
|
-
janus/prompts/templates/eval_prompts/incose/human.txt,sha256=
|
149
|
+
janus/prompts/templates/eval_prompts/incose/human.txt,sha256=iWixpC3vZSQDqtC3e8N68SmxbJEWPb4DVz0gxRERZ20,3307
|
146
150
|
janus/prompts/templates/eval_prompts/incose/system.txt,sha256=aXj9BsUsH2DFXrltgHl7xzBGvnCsh034oWyDHXVaA3c,96
|
147
151
|
janus/prompts/templates/eval_prompts/incose/variables.json,sha256=c2gssz87ix-Sn5w5OL72TS22VE5T0Yd787L8qB0VW_w,3012
|
148
152
|
janus/prompts/templates/eval_prompts/inline_comments/human.txt,sha256=6DUkmzLOr-VC4FzWALSeC2q0NI8hmRhoh8ZeTeh7CB8,2280
|
149
153
|
janus/prompts/templates/eval_prompts/inline_comments/system.txt,sha256=VnKktVHYYMYrKroQpmG6EnykYO5qbosIvUFYZl14wsg,101
|
150
154
|
janus/prompts/templates/eval_prompts/inline_comments/variables.json,sha256=dpRv25tcV2xeDwki5C1qTmkU_nZC-0yWe1RVFpqz2Sc,2099
|
155
|
+
janus/prompts/templates/extract_variables/human.txt,sha256=EVqW6jAGsSA2ANgC8La_5Ij8VgHEt2gbwsjy1q4OYes,129
|
156
|
+
janus/prompts/templates/extract_variables/system.txt,sha256=yH9S3nqiiDSWSoBKlS0PPM3c6s4Ck1zTNGnQQpHN4V0,120
|
151
157
|
janus/prompts/templates/micromanaged_mumps_v1.0/human.txt,sha256=FCXlIlRYPnz8yFCFNIptlHjs5ZccoYO-xUay7VWph3s,1143
|
152
158
|
janus/prompts/templates/micromanaged_mumps_v1.0/system.txt,sha256=S5M82T21RGqG9TLnKdzW3596GQDumPrXnvWIJJMjwLU,133
|
153
159
|
janus/prompts/templates/micromanaged_mumps_v2.0/human.txt,sha256=aqVDRHzGBWzj1k-XhuNUcO5Yd4YCUdJbRJOJrbiw_nY,1433
|
@@ -203,8 +209,8 @@ janus/utils/enums.py,sha256=o26C6xIqYSiC2areAGSAB4alP_uHfpKxNn5TnlZzlTI,29656
|
|
203
209
|
janus/utils/logger.py,sha256=vKofNjSvnXduvfkAWf1kkEES32tI7iI-bojfjIa3XoQ,2466
|
204
210
|
janus/utils/pdf_docs_reader.py,sha256=beMKHdYrFwg0m_i7n0OTJrut3sf4rEWFd7P_80A76WY,5140
|
205
211
|
janus/utils/progress.py,sha256=PIpcQec7SrhsfqB25LHj2CDDkfm9umZx90d9LZnAx6k,1469
|
206
|
-
janus_llm-4.
|
207
|
-
janus_llm-4.
|
208
|
-
janus_llm-4.
|
209
|
-
janus_llm-4.
|
210
|
-
janus_llm-4.
|
212
|
+
janus_llm-4.5.4.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
|
213
|
+
janus_llm-4.5.4.dist-info/METADATA,sha256=Pd0QN52fxzsTdlvdQ0HID-uwbIrsgcX6xnB577YSxnE,4695
|
214
|
+
janus_llm-4.5.4.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
215
|
+
janus_llm-4.5.4.dist-info/entry_points.txt,sha256=rzOrXUSS0SNN8I-fKV0-IBoYLTCMXnZfMtu4HetlCkU,43
|
216
|
+
janus_llm-4.5.4.dist-info/RECORD,,
|
janus/metrics/_tests/test_llm.py
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
from unittest.mock import patch
|
3
|
-
|
4
|
-
import pytest
|
5
|
-
|
6
|
-
from janus.llm.models_info import load_model
|
7
|
-
from janus.metrics.llm_metrics import llm_evaluate_option, llm_evaluate_ref_option
|
8
|
-
|
9
|
-
|
10
|
-
class TestLLMMetrics(unittest.TestCase):
|
11
|
-
def setUp(self):
|
12
|
-
self.bad_code = """
|
13
|
-
if __name__ == "__main__":
|
14
|
-
a1, a2, b3, b4 = 0, [1, 2000, "a"], 2, (1, 2)
|
15
|
-
for a in a2:
|
16
|
-
if b3:
|
17
|
-
elif not b3:
|
18
|
-
try:
|
19
|
-
pass
|
20
|
-
except:
|
21
|
-
raise ValueError
|
22
|
-
elif 1:
|
23
|
-
print(1)
|
24
|
-
else:
|
25
|
-
print(b4[0])
|
26
|
-
for (x, y) in range(a1, b3):
|
27
|
-
for i in range(003300):
|
28
|
-
for z in a2:
|
29
|
-
printf(b4[2])
|
30
|
-
"""
|
31
|
-
self.impressive_code = """
|
32
|
-
# This program prints out Hello, world!
|
33
|
-
|
34
|
-
print('Hello, world!')
|
35
|
-
"""
|
36
|
-
self.impressive_code_reference = """
|
37
|
-
# An implementation of python Hello, world!
|
38
|
-
|
39
|
-
print("'Hello, world!")
|
40
|
-
"""
|
41
|
-
|
42
|
-
@patch(".llm.models_info.load_model")
|
43
|
-
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
44
|
-
@pytest.mark.llm_eval
|
45
|
-
def test_llm_self_eval_quality(self, mock_llm_evaluate, mock_load_model):
|
46
|
-
"""Test that the quality llm self eval recognizes bad_code as bad code
|
47
|
-
(<5 on a scale of 1-10)"""
|
48
|
-
mock_llm_evaluate.return_value = 4 # return a value less than 5
|
49
|
-
mock_load_model.return_value = [None] # return a dummy model
|
50
|
-
|
51
|
-
bad_code_quality = llm_evaluate_option(
|
52
|
-
self.bad_code,
|
53
|
-
self.bad_code,
|
54
|
-
metric="quality",
|
55
|
-
language="python",
|
56
|
-
llm=load_model("gpt-4o")[0],
|
57
|
-
)
|
58
|
-
self.assertLess(bad_code_quality, 5)
|
59
|
-
|
60
|
-
mock_llm_evaluate.return_value = 6 # return a value greater than 5
|
61
|
-
impressive_code_quality = llm_evaluate_option(
|
62
|
-
self.impressive_code,
|
63
|
-
self.impressive_code,
|
64
|
-
metric="quality",
|
65
|
-
language="python",
|
66
|
-
llm=load_model("gpt-4o")[0],
|
67
|
-
)
|
68
|
-
self.assertGreater(impressive_code_quality, 5)
|
69
|
-
|
70
|
-
@patch("janus.llm.models_info.load_model")
|
71
|
-
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
72
|
-
@pytest.mark.llm_eval
|
73
|
-
def test_llm_self_eval_faithfulness(self, mock_llm_evaluate, mock_load_model):
|
74
|
-
"""The two Hello, world! samples are more or less the same,
|
75
|
-
so the faithfulness score should be high"""
|
76
|
-
mock_llm_evaluate.return_value = 9 # return a high value
|
77
|
-
mock_load_model.return_value = [None] # return a dummy model
|
78
|
-
|
79
|
-
faithfulness = llm_evaluate_ref_option(
|
80
|
-
self.impressive_code,
|
81
|
-
self.impressive_code_reference,
|
82
|
-
metric="faithfulness",
|
83
|
-
language="python",
|
84
|
-
llm=load_model("gpt-4o")[0],
|
85
|
-
)
|
86
|
-
self.assertGreater(faithfulness, 8)
|
87
|
-
|
88
|
-
|
89
|
-
if __name__ == "__main__":
|
90
|
-
unittest.main()
|
janus/metrics/llm_metrics.py
DELETED
@@ -1,202 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
import click
|
5
|
-
import typer
|
6
|
-
from langchain_core.exceptions import OutputParserException
|
7
|
-
from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
|
8
|
-
from langchain_core.prompts import PromptTemplate
|
9
|
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
|
-
from typing_extensions import Annotated
|
11
|
-
|
12
|
-
from janus.metrics.metric import metric
|
13
|
-
|
14
|
-
|
15
|
-
class LLMMetricOutput(BaseModel):
|
16
|
-
"""The output of an LLM evaluation metric."""
|
17
|
-
|
18
|
-
thought: str = Field(
|
19
|
-
...,
|
20
|
-
description=(
|
21
|
-
"The thought process that you took to reach your value determination."
|
22
|
-
),
|
23
|
-
)
|
24
|
-
value: str | float | int = Field(
|
25
|
-
..., description="The value of the metric described in the prompt."
|
26
|
-
)
|
27
|
-
|
28
|
-
|
29
|
-
def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
|
30
|
-
"""Load a default prompt from a file.
|
31
|
-
|
32
|
-
Arguments:
|
33
|
-
path: The path to the file.
|
34
|
-
language: The language of the prompt.
|
35
|
-
pydantic_model: The Pydantic model to use for parsing the output.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
The prompt text.
|
39
|
-
"""
|
40
|
-
if not path.exists():
|
41
|
-
raise FileNotFoundError(f"File not found: {path}")
|
42
|
-
prompt = PromptTemplate.from_template(
|
43
|
-
path.read_text(),
|
44
|
-
template_format="f-string",
|
45
|
-
partial_variables={
|
46
|
-
"language": language,
|
47
|
-
"format_instructions": parser.get_format_instructions(),
|
48
|
-
},
|
49
|
-
)
|
50
|
-
return prompt
|
51
|
-
|
52
|
-
|
53
|
-
def evaluate(
|
54
|
-
target: str,
|
55
|
-
language: str,
|
56
|
-
model: str,
|
57
|
-
prompt_path: Path,
|
58
|
-
reference: str | None = None,
|
59
|
-
):
|
60
|
-
"""Calculate the LLM self evaluation score.
|
61
|
-
|
62
|
-
Arguments:
|
63
|
-
target: The target text.
|
64
|
-
language: The language that the target code is written in.
|
65
|
-
prompt_path: The filepath of the prompt text
|
66
|
-
reference: The reference text.
|
67
|
-
|
68
|
-
Returns:
|
69
|
-
The LLM Evaluation score.
|
70
|
-
"""
|
71
|
-
parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
|
72
|
-
prompt = load_prompt(prompt_path, language, parser)
|
73
|
-
chain = prompt | model | parser
|
74
|
-
try:
|
75
|
-
output = (
|
76
|
-
chain.invoke(dict(target=target, reference=reference))
|
77
|
-
if reference
|
78
|
-
else chain.invoke(dict(target=target))
|
79
|
-
)
|
80
|
-
return output["value"]
|
81
|
-
except OutputParserException:
|
82
|
-
return False
|
83
|
-
|
84
|
-
|
85
|
-
@metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
|
86
|
-
def llm_evaluate_option(
|
87
|
-
target: str,
|
88
|
-
metric: Annotated[
|
89
|
-
str,
|
90
|
-
typer.Option(
|
91
|
-
"--metric",
|
92
|
-
"-m",
|
93
|
-
help=("The pre-defined metric to use for evaluation."),
|
94
|
-
click_type=click.Choice(
|
95
|
-
[
|
96
|
-
"quality",
|
97
|
-
"clarity",
|
98
|
-
"faithfulness",
|
99
|
-
"completeness",
|
100
|
-
"hallucination",
|
101
|
-
"readability",
|
102
|
-
"usefulness",
|
103
|
-
]
|
104
|
-
),
|
105
|
-
),
|
106
|
-
] = "quality",
|
107
|
-
prompt: Annotated[
|
108
|
-
str,
|
109
|
-
None,
|
110
|
-
typer.Option(
|
111
|
-
"--prompt",
|
112
|
-
"-P",
|
113
|
-
help=("A custom prompt in a .txt file to use for evaluation."),
|
114
|
-
),
|
115
|
-
] = None,
|
116
|
-
num_eval: Annotated[
|
117
|
-
int,
|
118
|
-
typer.Option(
|
119
|
-
"-n",
|
120
|
-
"--num-eval",
|
121
|
-
help="Number of times to run the evaluation",
|
122
|
-
),
|
123
|
-
] = 1,
|
124
|
-
**kwargs,
|
125
|
-
) -> Any:
|
126
|
-
"""CLI option to calculate the LLM self evaluation score.
|
127
|
-
|
128
|
-
Arguments:
|
129
|
-
target: The target text.
|
130
|
-
reference: The reference text.
|
131
|
-
metric: The pre-defined metric to use for evaluation.
|
132
|
-
prompt: The prompt text.
|
133
|
-
|
134
|
-
Returns:
|
135
|
-
The LLM Evaluation score.
|
136
|
-
"""
|
137
|
-
prompt_path: Path = (
|
138
|
-
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
139
|
-
)
|
140
|
-
if num_eval == 1:
|
141
|
-
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
142
|
-
else:
|
143
|
-
return [
|
144
|
-
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
145
|
-
for _ in range(num_eval)
|
146
|
-
]
|
147
|
-
|
148
|
-
|
149
|
-
@metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
|
150
|
-
def llm_evaluate_ref_option(
|
151
|
-
target: str,
|
152
|
-
reference: str,
|
153
|
-
metric: Annotated[
|
154
|
-
str,
|
155
|
-
typer.Option(
|
156
|
-
"--metric",
|
157
|
-
"-m",
|
158
|
-
help=("The pre-defined metric to use for evaluation."),
|
159
|
-
click_type=click.Choice(["faithfulness"]),
|
160
|
-
),
|
161
|
-
] = "faithfulness",
|
162
|
-
prompt: Annotated[
|
163
|
-
str,
|
164
|
-
None,
|
165
|
-
typer.Option(
|
166
|
-
"--prompt",
|
167
|
-
"-P",
|
168
|
-
help=("A custom prompt in a .txt file to use for evaluation."),
|
169
|
-
),
|
170
|
-
] = None,
|
171
|
-
num_eval: Annotated[
|
172
|
-
int,
|
173
|
-
typer.Option(
|
174
|
-
"-n",
|
175
|
-
"--num-eval",
|
176
|
-
help="Number of times to run evaluation for pair",
|
177
|
-
),
|
178
|
-
] = 1,
|
179
|
-
**kwargs,
|
180
|
-
) -> Any:
|
181
|
-
"""CLI option to calculate the LLM self evaluation score, for evaluations which
|
182
|
-
require a reference file (e.g. faithfulness)
|
183
|
-
|
184
|
-
Arguments:
|
185
|
-
target: The target text.
|
186
|
-
reference: The reference text.
|
187
|
-
metric: The pre-defined metric to use for evaluation.
|
188
|
-
prompt: The prompt text.
|
189
|
-
|
190
|
-
Returns:
|
191
|
-
The LLM Evaluation score.
|
192
|
-
"""
|
193
|
-
prompt_path: Path = (
|
194
|
-
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
195
|
-
)
|
196
|
-
if num_eval == 1:
|
197
|
-
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
198
|
-
else:
|
199
|
-
return [
|
200
|
-
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
201
|
-
for _ in range(num_eval)
|
202
|
-
]
|
File without changes
|
File without changes
|