janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +2 -2
- janus/__main__.py +1 -1
- janus/_tests/test_cli.py +1 -2
- janus/cli.py +43 -51
- janus/converter/__init__.py +6 -0
- janus/converter/_tests/__init__.py +0 -0
- janus/{_tests → converter/_tests}/test_translate.py +11 -22
- janus/converter/converter.py +614 -0
- janus/converter/diagram.py +124 -0
- janus/converter/document.py +131 -0
- janus/converter/evaluate.py +15 -0
- janus/converter/requirements.py +50 -0
- janus/converter/translate.py +108 -0
- janus/embedding/_tests/test_collections.py +2 -2
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/__init__.py +1 -0
- janus/language/alc/_tests/__init__.py +0 -0
- janus/language/alc/_tests/test_alc.py +28 -0
- janus/language/alc/alc.py +87 -0
- janus/language/block.py +4 -2
- janus/language/combine.py +0 -1
- janus/language/mumps/mumps.py +2 -3
- janus/language/naive/__init__.py +1 -1
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +23 -12
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/splitter.py +10 -4
- janus/language/treesitter/treesitter.py +26 -8
- janus/llm/model_callbacks.py +34 -37
- janus/llm/models_info.py +16 -3
- janus/metrics/_tests/test_llm.py +2 -3
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/complexity_metrics.py +3 -4
- janus/metrics/metric.py +3 -4
- janus/metrics/reading.py +27 -5
- janus/prompts/prompt.py +67 -7
- janus/utils/enums.py +6 -5
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
- janus/converter.py +0 -158
- janus/translate.py +0 -981
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
- {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,21 @@
|
|
1
|
-
janus/__init__.py,sha256=
|
2
|
-
janus/__main__.py,sha256=
|
1
|
+
janus/__init__.py,sha256=LVDmiK9hI9u2qwCVVKQtBo3fR1FomBig_DphrdUtr3E,351
|
2
|
+
janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
|
3
3
|
janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
|
5
|
-
janus/_tests/test_cli.py,sha256=
|
6
|
-
janus/
|
7
|
-
janus/
|
8
|
-
janus/converter.py,sha256=
|
5
|
+
janus/_tests/test_cli.py,sha256=mi7wAWV07ZFli5nQdExRGIGA3AMFD9s39-HcmDV4B6Y,4232
|
6
|
+
janus/cli.py,sha256=-aeg8R6CobK2EG_BPoZgBy_x1d6G9gp-KKKhnLMepo4,29541
|
7
|
+
janus/converter/__init__.py,sha256=kzVmWOPXRDayqqBZ8ZDaFQzA_q8PEdv407dc-DefPxY,255
|
8
|
+
janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
janus/converter/_tests/test_translate.py,sha256=eiLbmouokZrAeAYmdoJgnlx5-k4QiO6i0N6e6ZvZsvM,15885
|
10
|
+
janus/converter/converter.py,sha256=Bq07_9N_3Dv9NBqVACvb7LC2HxdQmfVZ1b0BlWrxjgo,23521
|
11
|
+
janus/converter/diagram.py,sha256=v-3ZZ4t1q74lDOjF2N6NRPkC3IK-sjLDn5_VChZTEGA,4608
|
12
|
+
janus/converter/document.py,sha256=hsW512veNjFWbdl5WriuUdNmMEqZy8ktRvqn9rRmA6E,4566
|
13
|
+
janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
|
14
|
+
janus/converter/requirements.py,sha256=orOVFymFSqSVyyII1DeDqeo3pTZLxw6dJ-JKBPigR0A,1751
|
15
|
+
janus/converter/translate.py,sha256=kMlGUiBYGQBXSxwX5in3CUyUifPM95wynCaRMxSDxMw,4238
|
9
16
|
janus/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
17
|
janus/embedding/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
-
janus/embedding/_tests/test_collections.py,sha256=
|
18
|
+
janus/embedding/_tests/test_collections.py,sha256=eT0cYv-qmPrHJRjDZqWPFTkqVzFDRoPrRKR__FPiz58,2651
|
12
19
|
janus/embedding/_tests/test_database.py,sha256=uqI2Jgj8DEIlciqiwiZx_n0osjcspIPrHOSSN1NRZSk,1019
|
13
20
|
janus/embedding/_tests/test_vectorize.py,sha256=NnJLHBwgMVycAProRJxuLVSByxrpJ35eaZCFca52gNY,1964
|
14
21
|
janus/embedding/collections.py,sha256=ZE8QGYQ82DCLqhV0m1y7PiqpuHjEfxHPcS5SCKU0LAw,5411
|
@@ -18,35 +25,39 @@ janus/embedding/vectorize.py,sha256=ap3e6ZMai8U3M5vdpLc_st4Sw31xyqoaqEno0IJlVOU,
|
|
18
25
|
janus/language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
26
|
janus/language/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
27
|
janus/language/_tests/test_combine.py,sha256=ydCYNbTxvaxT-5axiEBzPQLn6s4arSyZ5Tx2SYKLpJY,1830
|
21
|
-
janus/language/_tests/test_splitter.py,sha256=
|
28
|
+
janus/language/_tests/test_splitter.py,sha256=VK48eqp5PYJfjdhD_x7IkeAjbF1KC3AyNnICfK8XnUQ,360
|
29
|
+
janus/language/alc/__init__.py,sha256=j7vOMGhT1Vri6p8dsjSaY-fkO5uFn0sJ0nrNGGvcizM,42
|
30
|
+
janus/language/alc/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
+
janus/language/alc/_tests/test_alc.py,sha256=DttXpouP9Vkdlf23_a0qFalKdGtadGv6oXTsmN1pk8Q,994
|
32
|
+
janus/language/alc/alc.py,sha256=n8KVHTb6FFILw50N8UM3gfT60gLVvkTjk37easwluWs,3061
|
22
33
|
janus/language/binary/__init__.py,sha256=AlNAe12ZA366kcGSrQ1FJyOdbwxFqGBFkYR2K6yL818,51
|
23
34
|
janus/language/binary/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
35
|
janus/language/binary/_tests/test_binary.py,sha256=a-8RSfKA23UrJC9c1xPQK792XZCz8npCHI7isN2dAP8,1727
|
25
36
|
janus/language/binary/binary.py,sha256=CS1RAieN8klSsCeXQEFYKUWioatUX-sOPXKQr5S6NzE,6534
|
26
37
|
janus/language/binary/reveng/decompile_script.py,sha256=veW51oJzuO-4UD3Er062jXZ_FYtTFo9OCkl82Z2xr6A,2182
|
27
|
-
janus/language/block.py,sha256=
|
28
|
-
janus/language/combine.py,sha256=
|
38
|
+
janus/language/block.py,sha256=57hfOY-KSVMioKhkCvfDtovQt4h8lCg9cJbRF7ddV1s,9280
|
39
|
+
janus/language/combine.py,sha256=e7j8zQO_D3_LElaVCsGgtnzia7aFFK56m-mhArQBlR0,2908
|
29
40
|
janus/language/file.py,sha256=X2MYcAMlCABK77uhMdI_J2foXLrqEdinapYRfLPyKB8,563
|
30
41
|
janus/language/mumps/__init__.py,sha256=-Ou_wJ-JgHezfp1dub2_qCYNiK9wO-zo2MlqxM9qiwE,48
|
31
42
|
janus/language/mumps/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
43
|
janus/language/mumps/_tests/test_mumps.py,sha256=6l7q14lPnKf231iWwMdRbf-dg9QuHa26YMS7-K7yC4A,1001
|
33
|
-
janus/language/mumps/mumps.py,sha256=
|
44
|
+
janus/language/mumps/mumps.py,sha256=MkF_TZB1SOIj3JQfGKYow1Hh2Bja0EglUlpd4aAY5Iw,7351
|
34
45
|
janus/language/mumps/patterns.py,sha256=FW5T6Nt5kBO2UKgSL1KLVDbYRgMaJAzDvEmvBkxHppA,2310
|
35
|
-
janus/language/naive/__init__.py,sha256=
|
36
|
-
janus/language/naive/basic_splitter.py,sha256=
|
37
|
-
janus/language/naive/chunk_splitter.py,sha256=
|
38
|
-
janus/language/naive/registry.py,sha256=
|
39
|
-
janus/language/naive/simple_ast.py,sha256=
|
40
|
-
janus/language/naive/tag_splitter.py,sha256=
|
46
|
+
janus/language/naive/__init__.py,sha256=gsdC543qsIX8y_RxblCBIgyW0tfucljFms6v2WTrEz0,178
|
47
|
+
janus/language/naive/basic_splitter.py,sha256=NFW3TvMFQwEmcj5r4jvQXBJCzgNcSZI-3Arjb191gAo,407
|
48
|
+
janus/language/naive/chunk_splitter.py,sha256=g1nqbhvaOZ31SjO-smIwAg6lHGTy2rPOOnQ-m6fIKAA,713
|
49
|
+
janus/language/naive/registry.py,sha256=CDUkMIgscdPBV_qu49u9TGnOIgr9mRasinPRwViTWz8,281
|
50
|
+
janus/language/naive/simple_ast.py,sha256=boX_pJ8x52_MxiM6hJ-0oa6MR75Fu4pyebBEtYJJZUc,907
|
51
|
+
janus/language/naive/tag_splitter.py,sha256=6DHBJdM3IllcVV-MrAyj8KPg5zXTiRdeD42CYrmEFHk,1986
|
41
52
|
janus/language/node.py,sha256=-ymv--oILEYLVO2KSOrzOlzL2cZHNQpQJYwE1cKA-pY,200
|
42
|
-
janus/language/splitter.py,sha256=
|
53
|
+
janus/language/splitter.py,sha256=4XAe0hXka7njS30UHGCngJzDgHxn3lygUjikSHuV7Xo,16924
|
43
54
|
janus/language/treesitter/__init__.py,sha256=mUliw7ZJLZ8NkJKyUQMSoUV82hYXE0HvLHrEdGPJF4Q,43
|
44
55
|
janus/language/treesitter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
56
|
janus/language/treesitter/_tests/test_treesitter.py,sha256=nsavUV0aI6cpT9FkQve58eTTehLyQG6qJJBGlNa_bIw,2170
|
46
|
-
janus/language/treesitter/treesitter.py,sha256=
|
57
|
+
janus/language/treesitter/treesitter.py,sha256=UiV4OuWTt6IwMohHSw4FHsVNA_zxr9lNk4_Du09APdo,7509
|
47
58
|
janus/llm/__init__.py,sha256=8Pzn3Jdx867PzDc4xmwm8wvJDGzWSIhpN0NCEYFe0LQ,36
|
48
|
-
janus/llm/model_callbacks.py,sha256=
|
49
|
-
janus/llm/models_info.py,sha256=
|
59
|
+
janus/llm/model_callbacks.py,sha256=h_xlBAHRx-gxQBBjVKRpGXxdxYf6d9L6kBoXjbEAEdI,7106
|
60
|
+
janus/llm/models_info.py,sha256=B9Dn5mHc43OeZe5mHFj5wuhO194XHCTwShAa2ybnPyY,7688
|
50
61
|
janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
|
51
62
|
janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
63
|
janus/metrics/_tests/reference.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE8,13
|
@@ -54,19 +65,19 @@ janus/metrics/_tests/target.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE
|
|
54
65
|
janus/metrics/_tests/test_bleu.py,sha256=TcSnNGpMh00Nkkk1zq5wDfdCANMUq9eXscU_hcBRU8A,1640
|
55
66
|
janus/metrics/_tests/test_chrf.py,sha256=O4v1Cj513H8NYffJILpSI7CuR_dnm7F8CeB3C7sZYr0,2202
|
56
67
|
janus/metrics/_tests/test_file_pairing.py,sha256=A4Qy6JIesFXUcaig45Ze6LiViuHQS7MFSQzDHQP3j9w,1880
|
57
|
-
janus/metrics/_tests/test_llm.py,sha256=
|
68
|
+
janus/metrics/_tests/test_llm.py,sha256=dGXrdd79v-ix_560t6Q8RJEx-6mgZ-pkzJgm-O2ZBwA,2998
|
58
69
|
janus/metrics/_tests/test_reading.py,sha256=NDLFyjmOpM5gWf1LLTjGIw3aUR8Qf22zTt9hwe7NABs,840
|
59
|
-
janus/metrics/_tests/test_rouge_score.py,sha256=
|
60
|
-
janus/metrics/_tests/test_similarity_score.py,sha256=
|
70
|
+
janus/metrics/_tests/test_rouge_score.py,sha256=rcHmrpy55cW507PnTnGQnp9Tsn5rk7JEyXmusY7la3Q,2020
|
71
|
+
janus/metrics/_tests/test_similarity_score.py,sha256=jc3r0lWW5Iqm6AMKc36ewz5rboKwVw29fliBHClkzIg,799
|
61
72
|
janus/metrics/_tests/test_treesitter_metrics.py,sha256=tqpAg9LY811gfQ3n2ypRqBJesAFQodMf6Gz7dvOsqp4,4337
|
62
73
|
janus/metrics/bleu.py,sha256=eRoHIQulPp5mezJzHCNkwUB_89tAj4PqV2pF9eV9HfI,1746
|
63
74
|
janus/metrics/chrf.py,sha256=zNGWZ40CPMgj8rctnmwkbf25_PvSOLPbOjv-iN2cGXM,1472
|
64
75
|
janus/metrics/cli.py,sha256=Duuw2RF47Z-t1pal0cg3L_-N_91rx29krirqtIwjYLY,157
|
65
|
-
janus/metrics/complexity_metrics.py,sha256=
|
76
|
+
janus/metrics/complexity_metrics.py,sha256=1Z9n0o_CrILqayk40wRkjR1f7yvHIsJG38DxAbqj614,6560
|
66
77
|
janus/metrics/file_pairing.py,sha256=WNHRV1D8GOJMq8Pla5SPkTDAT7yVaS4-UU0XIGKvEVs,3729
|
67
78
|
janus/metrics/llm_metrics.py,sha256=3677S6GYcoVcokpmAN-fwvNu-lYWAKd7M5mebiE6RZc,5687
|
68
|
-
janus/metrics/metric.py,sha256=
|
69
|
-
janus/metrics/reading.py,sha256=
|
79
|
+
janus/metrics/metric.py,sha256=Lgdtq87oJ-kWC_6jdPQ6-d1MqoeTnhkRszo6IZJV6c0,16974
|
80
|
+
janus/metrics/reading.py,sha256=srLb2MO-vZL5ccRjaHz-dA4MwAvXVNyIKnOrvJXg77E,2244
|
70
81
|
janus/metrics/rouge_score.py,sha256=HfUJwUWI-yq5pOjML2ee4QTOMl0NQahnqEY2Mt8Dtnw,2865
|
71
82
|
janus/metrics/similarity.py,sha256=9pjWWpLKCsk0QfFfSgQNdPXiisqi7WJYOOHaiT8S0iY,1613
|
72
83
|
janus/metrics/splitting.py,sha256=610ScHRvALwdkqA6YyGI-tr3a18_cUofldBxGYX0SwE,968
|
@@ -78,17 +89,16 @@ janus/parsers/doc_parser.py,sha256=X8eCb1QXbL6sVWLEFGjsPyxrpJ9XnOPg7G4KZSo9A9E,5
|
|
78
89
|
janus/parsers/eval_parser.py,sha256=HB5-zY_Jpmkj6FDbuNCCVCRxwmzhViSAjPKbyyC0Ebc,2723
|
79
90
|
janus/parsers/reqs_parser.py,sha256=MFBvtR3otpyPZlkZxu0dVH1YeEJhvhNzhaGKGHaQVHA,2359
|
80
91
|
janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
|
-
janus/prompts/prompt.py,sha256=
|
82
|
-
janus/translate.py,sha256=bsQ1YvjCPrVrL3y-rAA2PrCv2-x3ObCF7a6LuPWQPuE,38747
|
92
|
+
janus/prompts/prompt.py,sha256=vd7UbitF0VFCi21RsggDebD51xcuyls_lQLGKkphfI8,10578
|
83
93
|
janus/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
84
94
|
janus/utils/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
95
|
janus/utils/_tests/test_logger.py,sha256=4jZFm8LX828Dt9lOjiFHZIPbxYy_hHaswyrMPkscgdM,2199
|
86
96
|
janus/utils/_tests/test_progress.py,sha256=Yh5NDNq-24n2nhHHbJm39pENAH70PYnh9ymwdcn0_UU,481
|
87
|
-
janus/utils/enums.py,sha256=
|
97
|
+
janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
|
88
98
|
janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
|
89
99
|
janus/utils/progress.py,sha256=pKcCzO9JOU9fSD7qTmLWcqY5smc8mujqQMXoPgqNysE,1458
|
90
|
-
janus_llm-
|
91
|
-
janus_llm-
|
92
|
-
janus_llm-
|
93
|
-
janus_llm-
|
94
|
-
janus_llm-
|
100
|
+
janus_llm-3.0.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
|
101
|
+
janus_llm-3.0.0.dist-info/METADATA,sha256=arXVqb1tVW76Bpjj1b1hq-rWsvu25DHg9rNv3ZaFZW0,4184
|
102
|
+
janus_llm-3.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
103
|
+
janus_llm-3.0.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
|
104
|
+
janus_llm-3.0.0.dist-info/RECORD,,
|
janus/converter.py
DELETED
@@ -1,158 +0,0 @@
|
|
1
|
-
import functools
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
from langchain.schema.language_model import BaseLanguageModel
|
5
|
-
|
6
|
-
from .language.binary import BinarySplitter
|
7
|
-
from .language.mumps import MumpsSplitter
|
8
|
-
from .language.splitter import Splitter
|
9
|
-
from .language.treesitter import TreeSitterSplitter
|
10
|
-
from .utils.enums import CUSTOM_SPLITTERS, LANGUAGES
|
11
|
-
from .utils.logger import create_logger
|
12
|
-
|
13
|
-
log = create_logger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
def run_if_changed(*tracked_vars):
|
17
|
-
"""Wrapper to skip function calls if the given instance attributes haven't
|
18
|
-
been updated. Requires the _changed_attrs set to exist, and the __setattr__
|
19
|
-
method to be overridden to track parameter updates in _changed_attrs.
|
20
|
-
"""
|
21
|
-
|
22
|
-
def wrapper(func):
|
23
|
-
@functools.wraps(func)
|
24
|
-
def wrapped(self, *args, **kwargs):
|
25
|
-
# If there is overlap between the tracked variables and the changed
|
26
|
-
# ones, then call the function as normal
|
27
|
-
if self._changed_attrs.intersection(tracked_vars):
|
28
|
-
func(self, *args, **kwargs)
|
29
|
-
|
30
|
-
return wrapped
|
31
|
-
|
32
|
-
return wrapper
|
33
|
-
|
34
|
-
|
35
|
-
class Converter:
|
36
|
-
"""Parent class that converts code into something else.
|
37
|
-
|
38
|
-
Children will determine what the code gets converted into. Whether that's translated
|
39
|
-
into another language, into pseudocode, requirements, documentation, etc., or
|
40
|
-
converted into embeddings
|
41
|
-
"""
|
42
|
-
|
43
|
-
def __init__(
|
44
|
-
self,
|
45
|
-
source_language: str = "fortran",
|
46
|
-
max_tokens: None | int = None,
|
47
|
-
protected_node_types: set[str] | list[str] | tuple[str] = (),
|
48
|
-
prune_node_types: set[str] | list[str] | tuple[str] = (),
|
49
|
-
) -> None:
|
50
|
-
"""Initialize a Converter instance.
|
51
|
-
|
52
|
-
Arguments:
|
53
|
-
source_language: The source programming language.
|
54
|
-
parser_type: The type of parser to use for parsing the LLM output. Valid
|
55
|
-
values are `"code"`, `"text"`, `"eval"`, and `None` (default). If `None`,
|
56
|
-
the `Converter` assumes you won't be parsing an output (i.e., adding to an
|
57
|
-
embedding DB).
|
58
|
-
"""
|
59
|
-
self._changed_attrs: set = set()
|
60
|
-
|
61
|
-
self._source_language: None | str
|
62
|
-
self._source_glob: None | str
|
63
|
-
self._protected_node_types: tuple[str] = ()
|
64
|
-
self._prune_node_types: tuple[str] = ()
|
65
|
-
self._splitter: None | Splitter
|
66
|
-
self._llm: None | BaseLanguageModel = None
|
67
|
-
self._max_tokens: None | int = max_tokens
|
68
|
-
|
69
|
-
self.set_source_language(source_language)
|
70
|
-
self.set_protected_node_types(protected_node_types)
|
71
|
-
self.set_prune_node_types(prune_node_types)
|
72
|
-
|
73
|
-
# Child class must call this. Should we enforce somehow?
|
74
|
-
# self._load_parameters()
|
75
|
-
|
76
|
-
def __setattr__(self, key: Any, value: Any) -> None:
|
77
|
-
if hasattr(self, "_changed_attrs"):
|
78
|
-
if not hasattr(self, key) or getattr(self, key) != value:
|
79
|
-
self._changed_attrs.add(key)
|
80
|
-
# Avoid infinite recursion
|
81
|
-
elif key != "_changed_attrs":
|
82
|
-
self._changed_attrs = set()
|
83
|
-
super().__setattr__(key, value)
|
84
|
-
|
85
|
-
def _load_parameters(self) -> None:
|
86
|
-
self._load_splitter()
|
87
|
-
self._changed_attrs.clear()
|
88
|
-
|
89
|
-
def set_source_language(self, source_language: str) -> None:
|
90
|
-
"""Validate and set the source language.
|
91
|
-
|
92
|
-
The affected objects will not be updated until _load_parameters() is called.
|
93
|
-
|
94
|
-
Arguments:
|
95
|
-
source_language: The source programming language.
|
96
|
-
"""
|
97
|
-
source_language = source_language.lower()
|
98
|
-
if source_language not in LANGUAGES:
|
99
|
-
raise ValueError(
|
100
|
-
f"Invalid source language: {source_language}. "
|
101
|
-
"Valid source languages are found in `janus.utils.enums.LANGUAGES`."
|
102
|
-
)
|
103
|
-
|
104
|
-
self._source_glob = f"**/*.{LANGUAGES[source_language]['suffix']}"
|
105
|
-
self._source_language = source_language
|
106
|
-
|
107
|
-
def set_protected_node_types(
|
108
|
-
self, protected_node_types: set[str] | list[str] | tuple[str]
|
109
|
-
) -> None:
|
110
|
-
"""Set the protected (non-mergeable) node types. This will often be structures
|
111
|
-
like functions, classes, or modules which you might want to keep separate
|
112
|
-
|
113
|
-
The affected objects will not be updated until _load_parameters() is called.
|
114
|
-
|
115
|
-
Arguments:
|
116
|
-
protected_node_types: A set of node types that aren't to be merged
|
117
|
-
"""
|
118
|
-
self._protected_node_types = tuple(set(protected_node_types or []))
|
119
|
-
|
120
|
-
def set_prune_node_types(
|
121
|
-
self, prune_node_types: set[str] | list[str] | tuple[str]
|
122
|
-
) -> None:
|
123
|
-
"""Set the node types to prune. This will often be structures
|
124
|
-
like comments or whitespace which you might want to keep out of the LLM
|
125
|
-
|
126
|
-
The affected objects will not be updated until _load_parameters() is called.
|
127
|
-
|
128
|
-
Arguments:
|
129
|
-
prune_node_types: A set of node types which should be pruned
|
130
|
-
"""
|
131
|
-
self._prune_node_types = tuple(set(prune_node_types or []))
|
132
|
-
|
133
|
-
@run_if_changed(
|
134
|
-
"_source_language",
|
135
|
-
"_max_tokens",
|
136
|
-
"_llm",
|
137
|
-
"_protected_node_types",
|
138
|
-
"_prune_node_types",
|
139
|
-
)
|
140
|
-
def _load_splitter(self) -> None:
|
141
|
-
"""Load the splitter according to this instance's attributes.
|
142
|
-
|
143
|
-
If the relevant fields have not been changed since the last time this method was
|
144
|
-
called, nothing happens.
|
145
|
-
"""
|
146
|
-
kwargs = dict(
|
147
|
-
max_tokens=self._max_tokens,
|
148
|
-
model=self._llm,
|
149
|
-
protected_node_types=self._protected_node_types,
|
150
|
-
prune_node_types=self._prune_node_types,
|
151
|
-
)
|
152
|
-
if self._source_language in CUSTOM_SPLITTERS:
|
153
|
-
if self._source_language == "mumps":
|
154
|
-
self._splitter = MumpsSplitter(**kwargs)
|
155
|
-
elif self._source_language == "binary":
|
156
|
-
self._splitter = BinarySplitter(**kwargs)
|
157
|
-
else:
|
158
|
-
self._splitter = TreeSitterSplitter(language=self._source_language, **kwargs)
|