janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. janus/__init__.py +2 -2
  2. janus/__main__.py +1 -1
  3. janus/_tests/test_cli.py +1 -2
  4. janus/cli.py +43 -51
  5. janus/converter/__init__.py +6 -0
  6. janus/converter/_tests/__init__.py +0 -0
  7. janus/{_tests → converter/_tests}/test_translate.py +11 -22
  8. janus/converter/converter.py +614 -0
  9. janus/converter/diagram.py +124 -0
  10. janus/converter/document.py +131 -0
  11. janus/converter/evaluate.py +15 -0
  12. janus/converter/requirements.py +50 -0
  13. janus/converter/translate.py +108 -0
  14. janus/embedding/_tests/test_collections.py +2 -2
  15. janus/language/_tests/test_splitter.py +1 -1
  16. janus/language/alc/__init__.py +1 -0
  17. janus/language/alc/_tests/__init__.py +0 -0
  18. janus/language/alc/_tests/test_alc.py +28 -0
  19. janus/language/alc/alc.py +87 -0
  20. janus/language/block.py +4 -2
  21. janus/language/combine.py +0 -1
  22. janus/language/mumps/mumps.py +2 -3
  23. janus/language/naive/__init__.py +1 -1
  24. janus/language/naive/basic_splitter.py +4 -4
  25. janus/language/naive/chunk_splitter.py +4 -4
  26. janus/language/naive/registry.py +1 -1
  27. janus/language/naive/simple_ast.py +23 -12
  28. janus/language/naive/tag_splitter.py +4 -4
  29. janus/language/splitter.py +10 -4
  30. janus/language/treesitter/treesitter.py +26 -8
  31. janus/llm/model_callbacks.py +34 -37
  32. janus/llm/models_info.py +16 -3
  33. janus/metrics/_tests/test_llm.py +2 -3
  34. janus/metrics/_tests/test_rouge_score.py +1 -1
  35. janus/metrics/_tests/test_similarity_score.py +1 -1
  36. janus/metrics/complexity_metrics.py +3 -4
  37. janus/metrics/metric.py +3 -4
  38. janus/metrics/reading.py +27 -5
  39. janus/prompts/prompt.py +67 -7
  40. janus/utils/enums.py +6 -5
  41. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
  42. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
  43. janus/converter.py +0 -158
  44. janus/translate.py +0 -981
  45. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
  46. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
  47. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,21 @@
1
- janus/__init__.py,sha256=GWW38p6MV8wH_SpAjgMKsoncBmX5J-7qBjCjgyoO8TY,341
2
- janus/__main__.py,sha256=Qd-f8z2Q2vpiEP2x6PBFsJrpACWDVxFKQk820MhFmHo,59
1
+ janus/__init__.py,sha256=LVDmiK9hI9u2qwCVVKQtBo3fR1FomBig_DphrdUtr3E,351
2
+ janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
3
3
  janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
5
- janus/_tests/test_cli.py,sha256=oP-WOM-ai4jZLDGqjLzI6kCtfXWpoeUR8TynP5p6cVg,4254
6
- janus/_tests/test_translate.py,sha256=71oRTTfdSVFOBvUhuOavgl3TuimTz1K6eG-04pUGpfE,16439
7
- janus/cli.py,sha256=X_7HXQGeQEYUkIF3AHbbc04lUEUjKegZ7tUFwmKjf10,29549
8
- janus/converter.py,sha256=ge7nJA1DlOrHW_uB9P0dguc48Au8g7bqCERNqMIqfxs,5941
5
+ janus/_tests/test_cli.py,sha256=mi7wAWV07ZFli5nQdExRGIGA3AMFD9s39-HcmDV4B6Y,4232
6
+ janus/cli.py,sha256=-aeg8R6CobK2EG_BPoZgBy_x1d6G9gp-KKKhnLMepo4,29541
7
+ janus/converter/__init__.py,sha256=kzVmWOPXRDayqqBZ8ZDaFQzA_q8PEdv407dc-DefPxY,255
8
+ janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ janus/converter/_tests/test_translate.py,sha256=eiLbmouokZrAeAYmdoJgnlx5-k4QiO6i0N6e6ZvZsvM,15885
10
+ janus/converter/converter.py,sha256=Bq07_9N_3Dv9NBqVACvb7LC2HxdQmfVZ1b0BlWrxjgo,23521
11
+ janus/converter/diagram.py,sha256=v-3ZZ4t1q74lDOjF2N6NRPkC3IK-sjLDn5_VChZTEGA,4608
12
+ janus/converter/document.py,sha256=hsW512veNjFWbdl5WriuUdNmMEqZy8ktRvqn9rRmA6E,4566
13
+ janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
14
+ janus/converter/requirements.py,sha256=orOVFymFSqSVyyII1DeDqeo3pTZLxw6dJ-JKBPigR0A,1751
15
+ janus/converter/translate.py,sha256=kMlGUiBYGQBXSxwX5in3CUyUifPM95wynCaRMxSDxMw,4238
9
16
  janus/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
17
  janus/embedding/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- janus/embedding/_tests/test_collections.py,sha256=llg-JSuRRFhKkHFiWWSHEWV3iaT6Lwue0lp2tEml9io,2668
18
+ janus/embedding/_tests/test_collections.py,sha256=eT0cYv-qmPrHJRjDZqWPFTkqVzFDRoPrRKR__FPiz58,2651
12
19
  janus/embedding/_tests/test_database.py,sha256=uqI2Jgj8DEIlciqiwiZx_n0osjcspIPrHOSSN1NRZSk,1019
13
20
  janus/embedding/_tests/test_vectorize.py,sha256=NnJLHBwgMVycAProRJxuLVSByxrpJ35eaZCFca52gNY,1964
14
21
  janus/embedding/collections.py,sha256=ZE8QGYQ82DCLqhV0m1y7PiqpuHjEfxHPcS5SCKU0LAw,5411
@@ -18,35 +25,39 @@ janus/embedding/vectorize.py,sha256=ap3e6ZMai8U3M5vdpLc_st4Sw31xyqoaqEno0IJlVOU,
18
25
  janus/language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
26
  janus/language/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
27
  janus/language/_tests/test_combine.py,sha256=ydCYNbTxvaxT-5axiEBzPQLn6s4arSyZ5Tx2SYKLpJY,1830
21
- janus/language/_tests/test_splitter.py,sha256=Hqexa39LLEXlK3ZUw7Zot4PUIACvye2vkq0Jaox0T10,373
28
+ janus/language/_tests/test_splitter.py,sha256=VK48eqp5PYJfjdhD_x7IkeAjbF1KC3AyNnICfK8XnUQ,360
29
+ janus/language/alc/__init__.py,sha256=j7vOMGhT1Vri6p8dsjSaY-fkO5uFn0sJ0nrNGGvcizM,42
30
+ janus/language/alc/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ janus/language/alc/_tests/test_alc.py,sha256=DttXpouP9Vkdlf23_a0qFalKdGtadGv6oXTsmN1pk8Q,994
32
+ janus/language/alc/alc.py,sha256=n8KVHTb6FFILw50N8UM3gfT60gLVvkTjk37easwluWs,3061
22
33
  janus/language/binary/__init__.py,sha256=AlNAe12ZA366kcGSrQ1FJyOdbwxFqGBFkYR2K6yL818,51
23
34
  janus/language/binary/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
35
  janus/language/binary/_tests/test_binary.py,sha256=a-8RSfKA23UrJC9c1xPQK792XZCz8npCHI7isN2dAP8,1727
25
36
  janus/language/binary/binary.py,sha256=CS1RAieN8klSsCeXQEFYKUWioatUX-sOPXKQr5S6NzE,6534
26
37
  janus/language/binary/reveng/decompile_script.py,sha256=veW51oJzuO-4UD3Er062jXZ_FYtTFo9OCkl82Z2xr6A,2182
27
- janus/language/block.py,sha256=4f8e3YYSS2p-0fXjjl2erbbXDOHcBxiLzDHALKlPTg4,9188
28
- janus/language/combine.py,sha256=hSEc1dHLcOELks-ZGsRKHOgYBXAronKus6BTmb1u42k,2940
38
+ janus/language/block.py,sha256=57hfOY-KSVMioKhkCvfDtovQt4h8lCg9cJbRF7ddV1s,9280
39
+ janus/language/combine.py,sha256=e7j8zQO_D3_LElaVCsGgtnzia7aFFK56m-mhArQBlR0,2908
29
40
  janus/language/file.py,sha256=X2MYcAMlCABK77uhMdI_J2foXLrqEdinapYRfLPyKB8,563
30
41
  janus/language/mumps/__init__.py,sha256=-Ou_wJ-JgHezfp1dub2_qCYNiK9wO-zo2MlqxM9qiwE,48
31
42
  janus/language/mumps/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
43
  janus/language/mumps/_tests/test_mumps.py,sha256=6l7q14lPnKf231iWwMdRbf-dg9QuHa26YMS7-K7yC4A,1001
33
- janus/language/mumps/mumps.py,sha256=J8ewuLDh7y9GeyCcGNYqCGJ9HOwrEWypc2HNxClZ8is,7382
44
+ janus/language/mumps/mumps.py,sha256=MkF_TZB1SOIj3JQfGKYow1Hh2Bja0EglUlpd4aAY5Iw,7351
34
45
  janus/language/mumps/patterns.py,sha256=FW5T6Nt5kBO2UKgSL1KLVDbYRgMaJAzDvEmvBkxHppA,2310
35
- janus/language/naive/__init__.py,sha256=6P5rDAZtoHTObkFbZkiEdd-PVRA_9VTQogUjwvRMKK8,198
36
- janus/language/naive/basic_splitter.py,sha256=RM9pJK2YkHfb6_EFEV-dh_rLqkjS6v0cn3ASPf8A6Fg,459
37
- janus/language/naive/chunk_splitter.py,sha256=ebRSbaJhDW-Hyr5__ukbdmAl6kQ1WWFqrq_SfCgHo6k,772
38
- janus/language/naive/registry.py,sha256=8YQX1q0IdAm7t69-oC_00I-vfkdRnHuX-OD3KEjEIuU,294
39
- janus/language/naive/simple_ast.py,sha256=gix_fh864sHZ5KeXoOZIVdKdQeCN_4Qwq4Ox-haZ6sY,593
40
- janus/language/naive/tag_splitter.py,sha256=IXWMn9tBVUGAtzvQi89GhoZ6g7fPXk5MzO0kMCr2mb0,2045
46
+ janus/language/naive/__init__.py,sha256=gsdC543qsIX8y_RxblCBIgyW0tfucljFms6v2WTrEz0,178
47
+ janus/language/naive/basic_splitter.py,sha256=NFW3TvMFQwEmcj5r4jvQXBJCzgNcSZI-3Arjb191gAo,407
48
+ janus/language/naive/chunk_splitter.py,sha256=g1nqbhvaOZ31SjO-smIwAg6lHGTy2rPOOnQ-m6fIKAA,713
49
+ janus/language/naive/registry.py,sha256=CDUkMIgscdPBV_qu49u9TGnOIgr9mRasinPRwViTWz8,281
50
+ janus/language/naive/simple_ast.py,sha256=boX_pJ8x52_MxiM6hJ-0oa6MR75Fu4pyebBEtYJJZUc,907
51
+ janus/language/naive/tag_splitter.py,sha256=6DHBJdM3IllcVV-MrAyj8KPg5zXTiRdeD42CYrmEFHk,1986
41
52
  janus/language/node.py,sha256=-ymv--oILEYLVO2KSOrzOlzL2cZHNQpQJYwE1cKA-pY,200
42
- janus/language/splitter.py,sha256=Ep8RxWrnuih3MAcdkkbtAsSLrPmyQcjnk0IzbRC-460,16741
53
+ janus/language/splitter.py,sha256=4XAe0hXka7njS30UHGCngJzDgHxn3lygUjikSHuV7Xo,16924
43
54
  janus/language/treesitter/__init__.py,sha256=mUliw7ZJLZ8NkJKyUQMSoUV82hYXE0HvLHrEdGPJF4Q,43
44
55
  janus/language/treesitter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
56
  janus/language/treesitter/_tests/test_treesitter.py,sha256=nsavUV0aI6cpT9FkQve58eTTehLyQG6qJJBGlNa_bIw,2170
46
- janus/language/treesitter/treesitter.py,sha256=9hbP7eBuSEKSZm6OD4C9q2tbjzrEidaCAKw74aO4lEM,6855
57
+ janus/language/treesitter/treesitter.py,sha256=UiV4OuWTt6IwMohHSw4FHsVNA_zxr9lNk4_Du09APdo,7509
47
58
  janus/llm/__init__.py,sha256=8Pzn3Jdx867PzDc4xmwm8wvJDGzWSIhpN0NCEYFe0LQ,36
48
- janus/llm/model_callbacks.py,sha256=zMCbMgniKrzKf-sU9SxOcfoOvc3xz7y0VxIxfdlS5tA,6766
49
- janus/llm/models_info.py,sha256=jNTp7mg7MVSS-Anp9Z-wMTz8odiE-1xXeyi8ngpJi1E,7151
59
+ janus/llm/model_callbacks.py,sha256=h_xlBAHRx-gxQBBjVKRpGXxdxYf6d9L6kBoXjbEAEdI,7106
60
+ janus/llm/models_info.py,sha256=B9Dn5mHc43OeZe5mHFj5wuhO194XHCTwShAa2ybnPyY,7688
50
61
  janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
51
62
  janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
63
  janus/metrics/_tests/reference.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE8,13
@@ -54,19 +65,19 @@ janus/metrics/_tests/target.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE
54
65
  janus/metrics/_tests/test_bleu.py,sha256=TcSnNGpMh00Nkkk1zq5wDfdCANMUq9eXscU_hcBRU8A,1640
55
66
  janus/metrics/_tests/test_chrf.py,sha256=O4v1Cj513H8NYffJILpSI7CuR_dnm7F8CeB3C7sZYr0,2202
56
67
  janus/metrics/_tests/test_file_pairing.py,sha256=A4Qy6JIesFXUcaig45Ze6LiViuHQS7MFSQzDHQP3j9w,1880
57
- janus/metrics/_tests/test_llm.py,sha256=IYsLwX5zC2WcaaPeSlHuQVmU2sB55-dsOXnmQPhcKps,3007
68
+ janus/metrics/_tests/test_llm.py,sha256=dGXrdd79v-ix_560t6Q8RJEx-6mgZ-pkzJgm-O2ZBwA,2998
58
69
  janus/metrics/_tests/test_reading.py,sha256=NDLFyjmOpM5gWf1LLTjGIw3aUR8Qf22zTt9hwe7NABs,840
59
- janus/metrics/_tests/test_rouge_score.py,sha256=dnP99nry-U5wyE-CiC0eQwm78IyScnmGQH3BeIEdmLY,2032
60
- janus/metrics/_tests/test_similarity_score.py,sha256=tdzH_8hYb2h7fKxpd_a75di-GPnU_frZ0zn2aeYzkso,811
70
+ janus/metrics/_tests/test_rouge_score.py,sha256=rcHmrpy55cW507PnTnGQnp9Tsn5rk7JEyXmusY7la3Q,2020
71
+ janus/metrics/_tests/test_similarity_score.py,sha256=jc3r0lWW5Iqm6AMKc36ewz5rboKwVw29fliBHClkzIg,799
61
72
  janus/metrics/_tests/test_treesitter_metrics.py,sha256=tqpAg9LY811gfQ3n2ypRqBJesAFQodMf6Gz7dvOsqp4,4337
62
73
  janus/metrics/bleu.py,sha256=eRoHIQulPp5mezJzHCNkwUB_89tAj4PqV2pF9eV9HfI,1746
63
74
  janus/metrics/chrf.py,sha256=zNGWZ40CPMgj8rctnmwkbf25_PvSOLPbOjv-iN2cGXM,1472
64
75
  janus/metrics/cli.py,sha256=Duuw2RF47Z-t1pal0cg3L_-N_91rx29krirqtIwjYLY,157
65
- janus/metrics/complexity_metrics.py,sha256=kJh_TyZttMP716MXGfl-WbaS4beR_DaQWGYVg6MurSU,6573
76
+ janus/metrics/complexity_metrics.py,sha256=1Z9n0o_CrILqayk40wRkjR1f7yvHIsJG38DxAbqj614,6560
66
77
  janus/metrics/file_pairing.py,sha256=WNHRV1D8GOJMq8Pla5SPkTDAT7yVaS4-UU0XIGKvEVs,3729
67
78
  janus/metrics/llm_metrics.py,sha256=3677S6GYcoVcokpmAN-fwvNu-lYWAKd7M5mebiE6RZc,5687
68
- janus/metrics/metric.py,sha256=QRzLCkHY2g4pWDIiVLNaxNiEMF4gXw_eUrjecH5CdwA,16987
69
- janus/metrics/reading.py,sha256=KYuWjKnk0ALRU5S7mCNNZtaNgK02l0fdIGsaNvxLMO4,1690
79
+ janus/metrics/metric.py,sha256=Lgdtq87oJ-kWC_6jdPQ6-d1MqoeTnhkRszo6IZJV6c0,16974
80
+ janus/metrics/reading.py,sha256=srLb2MO-vZL5ccRjaHz-dA4MwAvXVNyIKnOrvJXg77E,2244
70
81
  janus/metrics/rouge_score.py,sha256=HfUJwUWI-yq5pOjML2ee4QTOMl0NQahnqEY2Mt8Dtnw,2865
71
82
  janus/metrics/similarity.py,sha256=9pjWWpLKCsk0QfFfSgQNdPXiisqi7WJYOOHaiT8S0iY,1613
72
83
  janus/metrics/splitting.py,sha256=610ScHRvALwdkqA6YyGI-tr3a18_cUofldBxGYX0SwE,968
@@ -78,17 +89,16 @@ janus/parsers/doc_parser.py,sha256=X8eCb1QXbL6sVWLEFGjsPyxrpJ9XnOPg7G4KZSo9A9E,5
78
89
  janus/parsers/eval_parser.py,sha256=HB5-zY_Jpmkj6FDbuNCCVCRxwmzhViSAjPKbyyC0Ebc,2723
79
90
  janus/parsers/reqs_parser.py,sha256=MFBvtR3otpyPZlkZxu0dVH1YeEJhvhNzhaGKGHaQVHA,2359
80
91
  janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- janus/prompts/prompt.py,sha256=GMQ9EqwtIhB_x8MgfoeTeb4OkLaySYdGd3wVbpDMZXA,8911
82
- janus/translate.py,sha256=bsQ1YvjCPrVrL3y-rAA2PrCv2-x3ObCF7a6LuPWQPuE,38747
92
+ janus/prompts/prompt.py,sha256=vd7UbitF0VFCi21RsggDebD51xcuyls_lQLGKkphfI8,10578
83
93
  janus/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
94
  janus/utils/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
95
  janus/utils/_tests/test_logger.py,sha256=4jZFm8LX828Dt9lOjiFHZIPbxYy_hHaswyrMPkscgdM,2199
86
96
  janus/utils/_tests/test_progress.py,sha256=Yh5NDNq-24n2nhHHbJm39pENAH70PYnh9ymwdcn0_UU,481
87
- janus/utils/enums.py,sha256=SlZKHojLPYOSjuekQGirSHem5Etcgy57txCtVCej2Ag,27533
97
+ janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
88
98
  janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
89
99
  janus/utils/progress.py,sha256=pKcCzO9JOU9fSD7qTmLWcqY5smc8mujqQMXoPgqNysE,1458
90
- janus_llm-2.0.2.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
91
- janus_llm-2.0.2.dist-info/METADATA,sha256=sUtUMkjBZEmov6apazRphlqnBKG02dLkzWmq6-qIrqc,4184
92
- janus_llm-2.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
93
- janus_llm-2.0.2.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
94
- janus_llm-2.0.2.dist-info/RECORD,,
100
+ janus_llm-3.0.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
101
+ janus_llm-3.0.0.dist-info/METADATA,sha256=arXVqb1tVW76Bpjj1b1hq-rWsvu25DHg9rNv3ZaFZW0,4184
102
+ janus_llm-3.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
103
+ janus_llm-3.0.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
104
+ janus_llm-3.0.0.dist-info/RECORD,,
janus/converter.py DELETED
@@ -1,158 +0,0 @@
1
- import functools
2
- from typing import Any
3
-
4
- from langchain.schema.language_model import BaseLanguageModel
5
-
6
- from .language.binary import BinarySplitter
7
- from .language.mumps import MumpsSplitter
8
- from .language.splitter import Splitter
9
- from .language.treesitter import TreeSitterSplitter
10
- from .utils.enums import CUSTOM_SPLITTERS, LANGUAGES
11
- from .utils.logger import create_logger
12
-
13
- log = create_logger(__name__)
14
-
15
-
16
- def run_if_changed(*tracked_vars):
17
- """Wrapper to skip function calls if the given instance attributes haven't
18
- been updated. Requires the _changed_attrs set to exist, and the __setattr__
19
- method to be overridden to track parameter updates in _changed_attrs.
20
- """
21
-
22
- def wrapper(func):
23
- @functools.wraps(func)
24
- def wrapped(self, *args, **kwargs):
25
- # If there is overlap between the tracked variables and the changed
26
- # ones, then call the function as normal
27
- if self._changed_attrs.intersection(tracked_vars):
28
- func(self, *args, **kwargs)
29
-
30
- return wrapped
31
-
32
- return wrapper
33
-
34
-
35
- class Converter:
36
- """Parent class that converts code into something else.
37
-
38
- Children will determine what the code gets converted into. Whether that's translated
39
- into another language, into pseudocode, requirements, documentation, etc., or
40
- converted into embeddings
41
- """
42
-
43
- def __init__(
44
- self,
45
- source_language: str = "fortran",
46
- max_tokens: None | int = None,
47
- protected_node_types: set[str] | list[str] | tuple[str] = (),
48
- prune_node_types: set[str] | list[str] | tuple[str] = (),
49
- ) -> None:
50
- """Initialize a Converter instance.
51
-
52
- Arguments:
53
- source_language: The source programming language.
54
- parser_type: The type of parser to use for parsing the LLM output. Valid
55
- values are `"code"`, `"text"`, `"eval"`, and `None` (default). If `None`,
56
- the `Converter` assumes you won't be parsing an output (i.e., adding to an
57
- embedding DB).
58
- """
59
- self._changed_attrs: set = set()
60
-
61
- self._source_language: None | str
62
- self._source_glob: None | str
63
- self._protected_node_types: tuple[str] = ()
64
- self._prune_node_types: tuple[str] = ()
65
- self._splitter: None | Splitter
66
- self._llm: None | BaseLanguageModel = None
67
- self._max_tokens: None | int = max_tokens
68
-
69
- self.set_source_language(source_language)
70
- self.set_protected_node_types(protected_node_types)
71
- self.set_prune_node_types(prune_node_types)
72
-
73
- # Child class must call this. Should we enforce somehow?
74
- # self._load_parameters()
75
-
76
- def __setattr__(self, key: Any, value: Any) -> None:
77
- if hasattr(self, "_changed_attrs"):
78
- if not hasattr(self, key) or getattr(self, key) != value:
79
- self._changed_attrs.add(key)
80
- # Avoid infinite recursion
81
- elif key != "_changed_attrs":
82
- self._changed_attrs = set()
83
- super().__setattr__(key, value)
84
-
85
- def _load_parameters(self) -> None:
86
- self._load_splitter()
87
- self._changed_attrs.clear()
88
-
89
- def set_source_language(self, source_language: str) -> None:
90
- """Validate and set the source language.
91
-
92
- The affected objects will not be updated until _load_parameters() is called.
93
-
94
- Arguments:
95
- source_language: The source programming language.
96
- """
97
- source_language = source_language.lower()
98
- if source_language not in LANGUAGES:
99
- raise ValueError(
100
- f"Invalid source language: {source_language}. "
101
- "Valid source languages are found in `janus.utils.enums.LANGUAGES`."
102
- )
103
-
104
- self._source_glob = f"**/*.{LANGUAGES[source_language]['suffix']}"
105
- self._source_language = source_language
106
-
107
- def set_protected_node_types(
108
- self, protected_node_types: set[str] | list[str] | tuple[str]
109
- ) -> None:
110
- """Set the protected (non-mergeable) node types. This will often be structures
111
- like functions, classes, or modules which you might want to keep separate
112
-
113
- The affected objects will not be updated until _load_parameters() is called.
114
-
115
- Arguments:
116
- protected_node_types: A set of node types that aren't to be merged
117
- """
118
- self._protected_node_types = tuple(set(protected_node_types or []))
119
-
120
- def set_prune_node_types(
121
- self, prune_node_types: set[str] | list[str] | tuple[str]
122
- ) -> None:
123
- """Set the node types to prune. This will often be structures
124
- like comments or whitespace which you might want to keep out of the LLM
125
-
126
- The affected objects will not be updated until _load_parameters() is called.
127
-
128
- Arguments:
129
- prune_node_types: A set of node types which should be pruned
130
- """
131
- self._prune_node_types = tuple(set(prune_node_types or []))
132
-
133
- @run_if_changed(
134
- "_source_language",
135
- "_max_tokens",
136
- "_llm",
137
- "_protected_node_types",
138
- "_prune_node_types",
139
- )
140
- def _load_splitter(self) -> None:
141
- """Load the splitter according to this instance's attributes.
142
-
143
- If the relevant fields have not been changed since the last time this method was
144
- called, nothing happens.
145
- """
146
- kwargs = dict(
147
- max_tokens=self._max_tokens,
148
- model=self._llm,
149
- protected_node_types=self._protected_node_types,
150
- prune_node_types=self._prune_node_types,
151
- )
152
- if self._source_language in CUSTOM_SPLITTERS:
153
- if self._source_language == "mumps":
154
- self._splitter = MumpsSplitter(**kwargs)
155
- elif self._source_language == "binary":
156
- self._splitter = BinarySplitter(**kwargs)
157
- else:
158
- self._splitter = TreeSitterSplitter(language=self._source_language, **kwargs)