janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. janus/__init__.py +2 -2
  2. janus/__main__.py +1 -1
  3. janus/_tests/test_cli.py +1 -2
  4. janus/cli.py +43 -51
  5. janus/converter/__init__.py +6 -0
  6. janus/converter/_tests/__init__.py +0 -0
  7. janus/{_tests → converter/_tests}/test_translate.py +11 -22
  8. janus/converter/converter.py +614 -0
  9. janus/converter/diagram.py +124 -0
  10. janus/converter/document.py +131 -0
  11. janus/converter/evaluate.py +15 -0
  12. janus/converter/requirements.py +50 -0
  13. janus/converter/translate.py +108 -0
  14. janus/embedding/_tests/test_collections.py +2 -2
  15. janus/language/_tests/test_splitter.py +1 -1
  16. janus/language/alc/__init__.py +1 -0
  17. janus/language/alc/_tests/__init__.py +0 -0
  18. janus/language/alc/_tests/test_alc.py +28 -0
  19. janus/language/alc/alc.py +87 -0
  20. janus/language/block.py +4 -2
  21. janus/language/combine.py +0 -1
  22. janus/language/mumps/mumps.py +2 -3
  23. janus/language/naive/__init__.py +1 -1
  24. janus/language/naive/basic_splitter.py +4 -4
  25. janus/language/naive/chunk_splitter.py +4 -4
  26. janus/language/naive/registry.py +1 -1
  27. janus/language/naive/simple_ast.py +23 -12
  28. janus/language/naive/tag_splitter.py +4 -4
  29. janus/language/splitter.py +10 -4
  30. janus/language/treesitter/treesitter.py +26 -8
  31. janus/llm/model_callbacks.py +34 -37
  32. janus/llm/models_info.py +16 -3
  33. janus/metrics/_tests/test_llm.py +2 -3
  34. janus/metrics/_tests/test_rouge_score.py +1 -1
  35. janus/metrics/_tests/test_similarity_score.py +1 -1
  36. janus/metrics/complexity_metrics.py +3 -4
  37. janus/metrics/metric.py +3 -4
  38. janus/metrics/reading.py +27 -5
  39. janus/prompts/prompt.py +67 -7
  40. janus/utils/enums.py +6 -5
  41. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
  42. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
  43. janus/converter.py +0 -158
  44. janus/translate.py +0 -981
  45. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
  46. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
  47. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,21 @@
1
- janus/__init__.py,sha256=GWW38p6MV8wH_SpAjgMKsoncBmX5J-7qBjCjgyoO8TY,341
2
- janus/__main__.py,sha256=Qd-f8z2Q2vpiEP2x6PBFsJrpACWDVxFKQk820MhFmHo,59
1
+ janus/__init__.py,sha256=LVDmiK9hI9u2qwCVVKQtBo3fR1FomBig_DphrdUtr3E,351
2
+ janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
3
3
  janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
5
- janus/_tests/test_cli.py,sha256=oP-WOM-ai4jZLDGqjLzI6kCtfXWpoeUR8TynP5p6cVg,4254
6
- janus/_tests/test_translate.py,sha256=71oRTTfdSVFOBvUhuOavgl3TuimTz1K6eG-04pUGpfE,16439
7
- janus/cli.py,sha256=X_7HXQGeQEYUkIF3AHbbc04lUEUjKegZ7tUFwmKjf10,29549
8
- janus/converter.py,sha256=ge7nJA1DlOrHW_uB9P0dguc48Au8g7bqCERNqMIqfxs,5941
5
+ janus/_tests/test_cli.py,sha256=mi7wAWV07ZFli5nQdExRGIGA3AMFD9s39-HcmDV4B6Y,4232
6
+ janus/cli.py,sha256=-aeg8R6CobK2EG_BPoZgBy_x1d6G9gp-KKKhnLMepo4,29541
7
+ janus/converter/__init__.py,sha256=kzVmWOPXRDayqqBZ8ZDaFQzA_q8PEdv407dc-DefPxY,255
8
+ janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ janus/converter/_tests/test_translate.py,sha256=eiLbmouokZrAeAYmdoJgnlx5-k4QiO6i0N6e6ZvZsvM,15885
10
+ janus/converter/converter.py,sha256=Bq07_9N_3Dv9NBqVACvb7LC2HxdQmfVZ1b0BlWrxjgo,23521
11
+ janus/converter/diagram.py,sha256=v-3ZZ4t1q74lDOjF2N6NRPkC3IK-sjLDn5_VChZTEGA,4608
12
+ janus/converter/document.py,sha256=hsW512veNjFWbdl5WriuUdNmMEqZy8ktRvqn9rRmA6E,4566
13
+ janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
14
+ janus/converter/requirements.py,sha256=orOVFymFSqSVyyII1DeDqeo3pTZLxw6dJ-JKBPigR0A,1751
15
+ janus/converter/translate.py,sha256=kMlGUiBYGQBXSxwX5in3CUyUifPM95wynCaRMxSDxMw,4238
9
16
  janus/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
17
  janus/embedding/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- janus/embedding/_tests/test_collections.py,sha256=llg-JSuRRFhKkHFiWWSHEWV3iaT6Lwue0lp2tEml9io,2668
18
+ janus/embedding/_tests/test_collections.py,sha256=eT0cYv-qmPrHJRjDZqWPFTkqVzFDRoPrRKR__FPiz58,2651
12
19
  janus/embedding/_tests/test_database.py,sha256=uqI2Jgj8DEIlciqiwiZx_n0osjcspIPrHOSSN1NRZSk,1019
13
20
  janus/embedding/_tests/test_vectorize.py,sha256=NnJLHBwgMVycAProRJxuLVSByxrpJ35eaZCFca52gNY,1964
14
21
  janus/embedding/collections.py,sha256=ZE8QGYQ82DCLqhV0m1y7PiqpuHjEfxHPcS5SCKU0LAw,5411
@@ -18,35 +25,39 @@ janus/embedding/vectorize.py,sha256=ap3e6ZMai8U3M5vdpLc_st4Sw31xyqoaqEno0IJlVOU,
18
25
  janus/language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
26
  janus/language/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
27
  janus/language/_tests/test_combine.py,sha256=ydCYNbTxvaxT-5axiEBzPQLn6s4arSyZ5Tx2SYKLpJY,1830
21
- janus/language/_tests/test_splitter.py,sha256=Hqexa39LLEXlK3ZUw7Zot4PUIACvye2vkq0Jaox0T10,373
28
+ janus/language/_tests/test_splitter.py,sha256=VK48eqp5PYJfjdhD_x7IkeAjbF1KC3AyNnICfK8XnUQ,360
29
+ janus/language/alc/__init__.py,sha256=j7vOMGhT1Vri6p8dsjSaY-fkO5uFn0sJ0nrNGGvcizM,42
30
+ janus/language/alc/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ janus/language/alc/_tests/test_alc.py,sha256=DttXpouP9Vkdlf23_a0qFalKdGtadGv6oXTsmN1pk8Q,994
32
+ janus/language/alc/alc.py,sha256=n8KVHTb6FFILw50N8UM3gfT60gLVvkTjk37easwluWs,3061
22
33
  janus/language/binary/__init__.py,sha256=AlNAe12ZA366kcGSrQ1FJyOdbwxFqGBFkYR2K6yL818,51
23
34
  janus/language/binary/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
35
  janus/language/binary/_tests/test_binary.py,sha256=a-8RSfKA23UrJC9c1xPQK792XZCz8npCHI7isN2dAP8,1727
25
36
  janus/language/binary/binary.py,sha256=CS1RAieN8klSsCeXQEFYKUWioatUX-sOPXKQr5S6NzE,6534
26
37
  janus/language/binary/reveng/decompile_script.py,sha256=veW51oJzuO-4UD3Er062jXZ_FYtTFo9OCkl82Z2xr6A,2182
27
- janus/language/block.py,sha256=4f8e3YYSS2p-0fXjjl2erbbXDOHcBxiLzDHALKlPTg4,9188
28
- janus/language/combine.py,sha256=hSEc1dHLcOELks-ZGsRKHOgYBXAronKus6BTmb1u42k,2940
38
+ janus/language/block.py,sha256=57hfOY-KSVMioKhkCvfDtovQt4h8lCg9cJbRF7ddV1s,9280
39
+ janus/language/combine.py,sha256=e7j8zQO_D3_LElaVCsGgtnzia7aFFK56m-mhArQBlR0,2908
29
40
  janus/language/file.py,sha256=X2MYcAMlCABK77uhMdI_J2foXLrqEdinapYRfLPyKB8,563
30
41
  janus/language/mumps/__init__.py,sha256=-Ou_wJ-JgHezfp1dub2_qCYNiK9wO-zo2MlqxM9qiwE,48
31
42
  janus/language/mumps/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
43
  janus/language/mumps/_tests/test_mumps.py,sha256=6l7q14lPnKf231iWwMdRbf-dg9QuHa26YMS7-K7yC4A,1001
33
- janus/language/mumps/mumps.py,sha256=J8ewuLDh7y9GeyCcGNYqCGJ9HOwrEWypc2HNxClZ8is,7382
44
+ janus/language/mumps/mumps.py,sha256=MkF_TZB1SOIj3JQfGKYow1Hh2Bja0EglUlpd4aAY5Iw,7351
34
45
  janus/language/mumps/patterns.py,sha256=FW5T6Nt5kBO2UKgSL1KLVDbYRgMaJAzDvEmvBkxHppA,2310
35
- janus/language/naive/__init__.py,sha256=6P5rDAZtoHTObkFbZkiEdd-PVRA_9VTQogUjwvRMKK8,198
36
- janus/language/naive/basic_splitter.py,sha256=RM9pJK2YkHfb6_EFEV-dh_rLqkjS6v0cn3ASPf8A6Fg,459
37
- janus/language/naive/chunk_splitter.py,sha256=ebRSbaJhDW-Hyr5__ukbdmAl6kQ1WWFqrq_SfCgHo6k,772
38
- janus/language/naive/registry.py,sha256=8YQX1q0IdAm7t69-oC_00I-vfkdRnHuX-OD3KEjEIuU,294
39
- janus/language/naive/simple_ast.py,sha256=gix_fh864sHZ5KeXoOZIVdKdQeCN_4Qwq4Ox-haZ6sY,593
40
- janus/language/naive/tag_splitter.py,sha256=IXWMn9tBVUGAtzvQi89GhoZ6g7fPXk5MzO0kMCr2mb0,2045
46
+ janus/language/naive/__init__.py,sha256=gsdC543qsIX8y_RxblCBIgyW0tfucljFms6v2WTrEz0,178
47
+ janus/language/naive/basic_splitter.py,sha256=NFW3TvMFQwEmcj5r4jvQXBJCzgNcSZI-3Arjb191gAo,407
48
+ janus/language/naive/chunk_splitter.py,sha256=g1nqbhvaOZ31SjO-smIwAg6lHGTy2rPOOnQ-m6fIKAA,713
49
+ janus/language/naive/registry.py,sha256=CDUkMIgscdPBV_qu49u9TGnOIgr9mRasinPRwViTWz8,281
50
+ janus/language/naive/simple_ast.py,sha256=boX_pJ8x52_MxiM6hJ-0oa6MR75Fu4pyebBEtYJJZUc,907
51
+ janus/language/naive/tag_splitter.py,sha256=6DHBJdM3IllcVV-MrAyj8KPg5zXTiRdeD42CYrmEFHk,1986
41
52
  janus/language/node.py,sha256=-ymv--oILEYLVO2KSOrzOlzL2cZHNQpQJYwE1cKA-pY,200
42
- janus/language/splitter.py,sha256=Ep8RxWrnuih3MAcdkkbtAsSLrPmyQcjnk0IzbRC-460,16741
53
+ janus/language/splitter.py,sha256=4XAe0hXka7njS30UHGCngJzDgHxn3lygUjikSHuV7Xo,16924
43
54
  janus/language/treesitter/__init__.py,sha256=mUliw7ZJLZ8NkJKyUQMSoUV82hYXE0HvLHrEdGPJF4Q,43
44
55
  janus/language/treesitter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
56
  janus/language/treesitter/_tests/test_treesitter.py,sha256=nsavUV0aI6cpT9FkQve58eTTehLyQG6qJJBGlNa_bIw,2170
46
- janus/language/treesitter/treesitter.py,sha256=9hbP7eBuSEKSZm6OD4C9q2tbjzrEidaCAKw74aO4lEM,6855
57
+ janus/language/treesitter/treesitter.py,sha256=UiV4OuWTt6IwMohHSw4FHsVNA_zxr9lNk4_Du09APdo,7509
47
58
  janus/llm/__init__.py,sha256=8Pzn3Jdx867PzDc4xmwm8wvJDGzWSIhpN0NCEYFe0LQ,36
48
- janus/llm/model_callbacks.py,sha256=zMCbMgniKrzKf-sU9SxOcfoOvc3xz7y0VxIxfdlS5tA,6766
49
- janus/llm/models_info.py,sha256=jNTp7mg7MVSS-Anp9Z-wMTz8odiE-1xXeyi8ngpJi1E,7151
59
+ janus/llm/model_callbacks.py,sha256=h_xlBAHRx-gxQBBjVKRpGXxdxYf6d9L6kBoXjbEAEdI,7106
60
+ janus/llm/models_info.py,sha256=B9Dn5mHc43OeZe5mHFj5wuhO194XHCTwShAa2ybnPyY,7688
50
61
  janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
51
62
  janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
63
  janus/metrics/_tests/reference.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE8,13
@@ -54,19 +65,19 @@ janus/metrics/_tests/target.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE
54
65
  janus/metrics/_tests/test_bleu.py,sha256=TcSnNGpMh00Nkkk1zq5wDfdCANMUq9eXscU_hcBRU8A,1640
55
66
  janus/metrics/_tests/test_chrf.py,sha256=O4v1Cj513H8NYffJILpSI7CuR_dnm7F8CeB3C7sZYr0,2202
56
67
  janus/metrics/_tests/test_file_pairing.py,sha256=A4Qy6JIesFXUcaig45Ze6LiViuHQS7MFSQzDHQP3j9w,1880
57
- janus/metrics/_tests/test_llm.py,sha256=IYsLwX5zC2WcaaPeSlHuQVmU2sB55-dsOXnmQPhcKps,3007
68
+ janus/metrics/_tests/test_llm.py,sha256=dGXrdd79v-ix_560t6Q8RJEx-6mgZ-pkzJgm-O2ZBwA,2998
58
69
  janus/metrics/_tests/test_reading.py,sha256=NDLFyjmOpM5gWf1LLTjGIw3aUR8Qf22zTt9hwe7NABs,840
59
- janus/metrics/_tests/test_rouge_score.py,sha256=dnP99nry-U5wyE-CiC0eQwm78IyScnmGQH3BeIEdmLY,2032
60
- janus/metrics/_tests/test_similarity_score.py,sha256=tdzH_8hYb2h7fKxpd_a75di-GPnU_frZ0zn2aeYzkso,811
70
+ janus/metrics/_tests/test_rouge_score.py,sha256=rcHmrpy55cW507PnTnGQnp9Tsn5rk7JEyXmusY7la3Q,2020
71
+ janus/metrics/_tests/test_similarity_score.py,sha256=jc3r0lWW5Iqm6AMKc36ewz5rboKwVw29fliBHClkzIg,799
61
72
  janus/metrics/_tests/test_treesitter_metrics.py,sha256=tqpAg9LY811gfQ3n2ypRqBJesAFQodMf6Gz7dvOsqp4,4337
62
73
  janus/metrics/bleu.py,sha256=eRoHIQulPp5mezJzHCNkwUB_89tAj4PqV2pF9eV9HfI,1746
63
74
  janus/metrics/chrf.py,sha256=zNGWZ40CPMgj8rctnmwkbf25_PvSOLPbOjv-iN2cGXM,1472
64
75
  janus/metrics/cli.py,sha256=Duuw2RF47Z-t1pal0cg3L_-N_91rx29krirqtIwjYLY,157
65
- janus/metrics/complexity_metrics.py,sha256=kJh_TyZttMP716MXGfl-WbaS4beR_DaQWGYVg6MurSU,6573
76
+ janus/metrics/complexity_metrics.py,sha256=1Z9n0o_CrILqayk40wRkjR1f7yvHIsJG38DxAbqj614,6560
66
77
  janus/metrics/file_pairing.py,sha256=WNHRV1D8GOJMq8Pla5SPkTDAT7yVaS4-UU0XIGKvEVs,3729
67
78
  janus/metrics/llm_metrics.py,sha256=3677S6GYcoVcokpmAN-fwvNu-lYWAKd7M5mebiE6RZc,5687
68
- janus/metrics/metric.py,sha256=QRzLCkHY2g4pWDIiVLNaxNiEMF4gXw_eUrjecH5CdwA,16987
69
- janus/metrics/reading.py,sha256=KYuWjKnk0ALRU5S7mCNNZtaNgK02l0fdIGsaNvxLMO4,1690
79
+ janus/metrics/metric.py,sha256=Lgdtq87oJ-kWC_6jdPQ6-d1MqoeTnhkRszo6IZJV6c0,16974
80
+ janus/metrics/reading.py,sha256=srLb2MO-vZL5ccRjaHz-dA4MwAvXVNyIKnOrvJXg77E,2244
70
81
  janus/metrics/rouge_score.py,sha256=HfUJwUWI-yq5pOjML2ee4QTOMl0NQahnqEY2Mt8Dtnw,2865
71
82
  janus/metrics/similarity.py,sha256=9pjWWpLKCsk0QfFfSgQNdPXiisqi7WJYOOHaiT8S0iY,1613
72
83
  janus/metrics/splitting.py,sha256=610ScHRvALwdkqA6YyGI-tr3a18_cUofldBxGYX0SwE,968
@@ -78,17 +89,16 @@ janus/parsers/doc_parser.py,sha256=X8eCb1QXbL6sVWLEFGjsPyxrpJ9XnOPg7G4KZSo9A9E,5
78
89
  janus/parsers/eval_parser.py,sha256=HB5-zY_Jpmkj6FDbuNCCVCRxwmzhViSAjPKbyyC0Ebc,2723
79
90
  janus/parsers/reqs_parser.py,sha256=MFBvtR3otpyPZlkZxu0dVH1YeEJhvhNzhaGKGHaQVHA,2359
80
91
  janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- janus/prompts/prompt.py,sha256=GMQ9EqwtIhB_x8MgfoeTeb4OkLaySYdGd3wVbpDMZXA,8911
82
- janus/translate.py,sha256=bsQ1YvjCPrVrL3y-rAA2PrCv2-x3ObCF7a6LuPWQPuE,38747
92
+ janus/prompts/prompt.py,sha256=vd7UbitF0VFCi21RsggDebD51xcuyls_lQLGKkphfI8,10578
83
93
  janus/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
94
  janus/utils/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
95
  janus/utils/_tests/test_logger.py,sha256=4jZFm8LX828Dt9lOjiFHZIPbxYy_hHaswyrMPkscgdM,2199
86
96
  janus/utils/_tests/test_progress.py,sha256=Yh5NDNq-24n2nhHHbJm39pENAH70PYnh9ymwdcn0_UU,481
87
- janus/utils/enums.py,sha256=SlZKHojLPYOSjuekQGirSHem5Etcgy57txCtVCej2Ag,27533
97
+ janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
88
98
  janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
89
99
  janus/utils/progress.py,sha256=pKcCzO9JOU9fSD7qTmLWcqY5smc8mujqQMXoPgqNysE,1458
90
- janus_llm-2.0.2.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
91
- janus_llm-2.0.2.dist-info/METADATA,sha256=sUtUMkjBZEmov6apazRphlqnBKG02dLkzWmq6-qIrqc,4184
92
- janus_llm-2.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
93
- janus_llm-2.0.2.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
94
- janus_llm-2.0.2.dist-info/RECORD,,
100
+ janus_llm-3.0.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
101
+ janus_llm-3.0.0.dist-info/METADATA,sha256=arXVqb1tVW76Bpjj1b1hq-rWsvu25DHg9rNv3ZaFZW0,4184
102
+ janus_llm-3.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
103
+ janus_llm-3.0.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
104
+ janus_llm-3.0.0.dist-info/RECORD,,
janus/converter.py DELETED
@@ -1,158 +0,0 @@
1
- import functools
2
- from typing import Any
3
-
4
- from langchain.schema.language_model import BaseLanguageModel
5
-
6
- from .language.binary import BinarySplitter
7
- from .language.mumps import MumpsSplitter
8
- from .language.splitter import Splitter
9
- from .language.treesitter import TreeSitterSplitter
10
- from .utils.enums import CUSTOM_SPLITTERS, LANGUAGES
11
- from .utils.logger import create_logger
12
-
13
- log = create_logger(__name__)
14
-
15
-
16
- def run_if_changed(*tracked_vars):
17
- """Wrapper to skip function calls if the given instance attributes haven't
18
- been updated. Requires the _changed_attrs set to exist, and the __setattr__
19
- method to be overridden to track parameter updates in _changed_attrs.
20
- """
21
-
22
- def wrapper(func):
23
- @functools.wraps(func)
24
- def wrapped(self, *args, **kwargs):
25
- # If there is overlap between the tracked variables and the changed
26
- # ones, then call the function as normal
27
- if self._changed_attrs.intersection(tracked_vars):
28
- func(self, *args, **kwargs)
29
-
30
- return wrapped
31
-
32
- return wrapper
33
-
34
-
35
- class Converter:
36
- """Parent class that converts code into something else.
37
-
38
- Children will determine what the code gets converted into. Whether that's translated
39
- into another language, into pseudocode, requirements, documentation, etc., or
40
- converted into embeddings
41
- """
42
-
43
- def __init__(
44
- self,
45
- source_language: str = "fortran",
46
- max_tokens: None | int = None,
47
- protected_node_types: set[str] | list[str] | tuple[str] = (),
48
- prune_node_types: set[str] | list[str] | tuple[str] = (),
49
- ) -> None:
50
- """Initialize a Converter instance.
51
-
52
- Arguments:
53
- source_language: The source programming language.
54
- parser_type: The type of parser to use for parsing the LLM output. Valid
55
- values are `"code"`, `"text"`, `"eval"`, and `None` (default). If `None`,
56
- the `Converter` assumes you won't be parsing an output (i.e., adding to an
57
- embedding DB).
58
- """
59
- self._changed_attrs: set = set()
60
-
61
- self._source_language: None | str
62
- self._source_glob: None | str
63
- self._protected_node_types: tuple[str] = ()
64
- self._prune_node_types: tuple[str] = ()
65
- self._splitter: None | Splitter
66
- self._llm: None | BaseLanguageModel = None
67
- self._max_tokens: None | int = max_tokens
68
-
69
- self.set_source_language(source_language)
70
- self.set_protected_node_types(protected_node_types)
71
- self.set_prune_node_types(prune_node_types)
72
-
73
- # Child class must call this. Should we enforce somehow?
74
- # self._load_parameters()
75
-
76
- def __setattr__(self, key: Any, value: Any) -> None:
77
- if hasattr(self, "_changed_attrs"):
78
- if not hasattr(self, key) or getattr(self, key) != value:
79
- self._changed_attrs.add(key)
80
- # Avoid infinite recursion
81
- elif key != "_changed_attrs":
82
- self._changed_attrs = set()
83
- super().__setattr__(key, value)
84
-
85
- def _load_parameters(self) -> None:
86
- self._load_splitter()
87
- self._changed_attrs.clear()
88
-
89
- def set_source_language(self, source_language: str) -> None:
90
- """Validate and set the source language.
91
-
92
- The affected objects will not be updated until _load_parameters() is called.
93
-
94
- Arguments:
95
- source_language: The source programming language.
96
- """
97
- source_language = source_language.lower()
98
- if source_language not in LANGUAGES:
99
- raise ValueError(
100
- f"Invalid source language: {source_language}. "
101
- "Valid source languages are found in `janus.utils.enums.LANGUAGES`."
102
- )
103
-
104
- self._source_glob = f"**/*.{LANGUAGES[source_language]['suffix']}"
105
- self._source_language = source_language
106
-
107
- def set_protected_node_types(
108
- self, protected_node_types: set[str] | list[str] | tuple[str]
109
- ) -> None:
110
- """Set the protected (non-mergeable) node types. This will often be structures
111
- like functions, classes, or modules which you might want to keep separate
112
-
113
- The affected objects will not be updated until _load_parameters() is called.
114
-
115
- Arguments:
116
- protected_node_types: A set of node types that aren't to be merged
117
- """
118
- self._protected_node_types = tuple(set(protected_node_types or []))
119
-
120
- def set_prune_node_types(
121
- self, prune_node_types: set[str] | list[str] | tuple[str]
122
- ) -> None:
123
- """Set the node types to prune. This will often be structures
124
- like comments or whitespace which you might want to keep out of the LLM
125
-
126
- The affected objects will not be updated until _load_parameters() is called.
127
-
128
- Arguments:
129
- prune_node_types: A set of node types which should be pruned
130
- """
131
- self._prune_node_types = tuple(set(prune_node_types or []))
132
-
133
- @run_if_changed(
134
- "_source_language",
135
- "_max_tokens",
136
- "_llm",
137
- "_protected_node_types",
138
- "_prune_node_types",
139
- )
140
- def _load_splitter(self) -> None:
141
- """Load the splitter according to this instance's attributes.
142
-
143
- If the relevant fields have not been changed since the last time this method was
144
- called, nothing happens.
145
- """
146
- kwargs = dict(
147
- max_tokens=self._max_tokens,
148
- model=self._llm,
149
- protected_node_types=self._protected_node_types,
150
- prune_node_types=self._prune_node_types,
151
- )
152
- if self._source_language in CUSTOM_SPLITTERS:
153
- if self._source_language == "mumps":
154
- self._splitter = MumpsSplitter(**kwargs)
155
- elif self._source_language == "binary":
156
- self._splitter = BinarySplitter(**kwargs)
157
- else:
158
- self._splitter = TreeSitterSplitter(language=self._source_language, **kwargs)