reproto 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .git/COMMIT_EDITMSG +1 -29
- .git/index +0 -0
- .git/logs/HEAD +2 -0
- .git/logs/refs/heads/iyue +2 -0
- .git/logs/refs/remotes/gitlab/iyue +2 -0
- .git/logs/refs/remotes/origin/iyue +2 -0
- .git/objects/15/eb3f02479e633439ec83c143e703f8448043a1 +0 -0
- .git/objects/26/1f67f3b731b32f6d77de9dd7be2d61e2a14ace +0 -0
- .git/objects/2e/2c1c42f5ac5d665cc672d3792078b756d9ab0e +0 -0
- .git/objects/35/8bace20b731ff1bbb256d2a0158dfc84720978 +0 -0
- .git/objects/4d/6d457bfabc4af842e5ddc2d56eb059d5dfdc9d +0 -0
- .git/objects/55/6723fdd4f525eed41c52fa80defca3f0c81c47 +0 -0
- .git/objects/65/a4f0ada7519f8b1e6a7c7e287541b8effde9fd +0 -0
- .git/objects/8d/44142ae2d6dbb59d4ebed8587bccd051e5766b +0 -0
- .git/objects/8d/4a5767bef0c342f1660526f9671c0944922c40 +0 -0
- .git/objects/95/295a15779ebefd563ec777c3d3cced7e8d0209 +0 -0
- .git/objects/db/beedb30613f79ae3ff67df1428cf8ade223711 +0 -0
- .git/objects/fc/e15b9dbffd9f37b1f2d46944ee2d0394df6565 +2 -0
- .git/refs/heads/iyue +1 -1
- .git/refs/remotes/gitlab/iyue +1 -1
- .git/refs/remotes/origin/iyue +1 -1
- core/reconstructor.py +52 -10
- generation/proto_generator.py +13 -7
- pyproject.toml +1 -1
- {reproto-0.0.7.dist-info → reproto-0.0.8.dist-info}/METADATA +1 -1
- {reproto-0.0.7.dist-info → reproto-0.0.8.dist-info}/RECORD +30 -16
- utils/file_cache.py +165 -0
- utils/type_index.py +341 -0
- {reproto-0.0.7.dist-info → reproto-0.0.8.dist-info}/WHEEL +0 -0
- {reproto-0.0.7.dist-info → reproto-0.0.8.dist-info}/entry_points.txt +0 -0
.git/COMMIT_EDITMSG
CHANGED
@@ -1,29 +1 @@
|
|
1
|
-
feat:
|
2
|
-
|
3
|
-
🎯 **核心改进**
|
4
|
-
- 删除google_protobuf_types.py,采用基于Java源码的动态类型识别
|
5
|
-
- 实现枚举类型的精确识别(Access、Gender、Badge等)
|
6
|
-
- 修复map类型和repeated类型的处理逻辑
|
7
|
-
- 增强错误处理和统计报告
|
8
|
-
|
9
|
-
🔧 **技术修复**
|
10
|
-
- JavaSourceAnalyzer: 增强setter方法分析,支持枚举类型推断
|
11
|
-
- InfoDecoder: 重构字段类型转换,优先使用Java源码信息
|
12
|
-
- ProtoReconstructor: 添加枚举解析器,完善依赖追踪
|
13
|
-
- ProtoGenerator: 修复基础类型判断,避免错误导入
|
14
|
-
|
15
|
-
🚀 **功能增强**
|
16
|
-
- 支持Internal.ProtobufList<T> -> repeated T 转换
|
17
|
-
- 支持MapFieldLite<K,V> -> map<k,v> 转换
|
18
|
-
- 支持Internal.IntList -> repeated enum 转换
|
19
|
-
- 增强包名匹配算法,避免第三方库误匹配
|
20
|
-
|
21
|
-
📊 **统计改进**
|
22
|
-
- 详细的成功/失败/跳过统计
|
23
|
-
- 错误原因分类和报告
|
24
|
-
- 支持verbose模式的详细日志
|
25
|
-
|
26
|
-
🐛 **Bug修复**
|
27
|
-
- 修复基础类型被误认为自定义类型的问题
|
28
|
-
- 修复map类型repeated规则错误
|
29
|
-
- 修复Unity3D等第三方库误匹配问题
|
1
|
+
feat(build): update version
|
.git/index
CHANGED
Binary file
|
.git/logs/HEAD
CHANGED
@@ -2,3 +2,5 @@
|
|
2
2
|
2e11d561668286f3d6d48f399ffab1943a5cdcb2 f22bfffda9fb6a7f37bed04b7c40c9466ef09454 iyue <ys1231@126.com> 1750925875 +0800 commit: feat: 完善Protobuf类型推断系统,支持map和未知类型处理
|
3
3
|
f22bfffda9fb6a7f37bed04b7c40c9466ef09454 e82f42ea26b8bf4f0bc92c0648ac8f190f14226d iyue <ys1231@126.com> 1750927650 +0800 commit: feat: rename reproto
|
4
4
|
e82f42ea26b8bf4f0bc92c0648ac8f190f14226d 9ae313cdf64cd82416c1238eb493e6396f799f12 iyue <ys1231@126.com> 1750942828 +0800 commit: feat: 重构字段类型识别系统,实现基于Java源码的精确类型解析
|
5
|
+
9ae313cdf64cd82416c1238eb493e6396f799f12 4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d iyue <ys1231@126.com> 1750946357 +0800 commit: feat: 优化proto生成性能和字段排序
|
6
|
+
4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d fce15b9dbffd9f37b1f2d46944ee2d0394df6565 iyue <ys1231@126.com> 1750946542 +0800 commit: feat(build): update version
|
.git/logs/refs/heads/iyue
CHANGED
@@ -2,3 +2,5 @@
|
|
2
2
|
2e11d561668286f3d6d48f399ffab1943a5cdcb2 f22bfffda9fb6a7f37bed04b7c40c9466ef09454 iyue <ys1231@126.com> 1750925875 +0800 commit: feat: 完善Protobuf类型推断系统,支持map和未知类型处理
|
3
3
|
f22bfffda9fb6a7f37bed04b7c40c9466ef09454 e82f42ea26b8bf4f0bc92c0648ac8f190f14226d iyue <ys1231@126.com> 1750927650 +0800 commit: feat: rename reproto
|
4
4
|
e82f42ea26b8bf4f0bc92c0648ac8f190f14226d 9ae313cdf64cd82416c1238eb493e6396f799f12 iyue <ys1231@126.com> 1750942828 +0800 commit: feat: 重构字段类型识别系统,实现基于Java源码的精确类型解析
|
5
|
+
9ae313cdf64cd82416c1238eb493e6396f799f12 4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d iyue <ys1231@126.com> 1750946357 +0800 commit: feat: 优化proto生成性能和字段排序
|
6
|
+
4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d fce15b9dbffd9f37b1f2d46944ee2d0394df6565 iyue <ys1231@126.com> 1750946542 +0800 commit: feat(build): update version
|
@@ -1,3 +1,5 @@
|
|
1
1
|
0000000000000000000000000000000000000000 f22bfffda9fb6a7f37bed04b7c40c9466ef09454 iyue <ys1231@126.com> 1750926057 +0800 update by push
|
2
2
|
f22bfffda9fb6a7f37bed04b7c40c9466ef09454 e82f42ea26b8bf4f0bc92c0648ac8f190f14226d iyue <ys1231@126.com> 1750927662 +0800 update by push
|
3
3
|
e82f42ea26b8bf4f0bc92c0648ac8f190f14226d 9ae313cdf64cd82416c1238eb493e6396f799f12 iyue <ys1231@126.com> 1750942950 +0800 update by push
|
4
|
+
9ae313cdf64cd82416c1238eb493e6396f799f12 4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d iyue <ys1231@126.com> 1750946407 +0800 update by push
|
5
|
+
4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d fce15b9dbffd9f37b1f2d46944ee2d0394df6565 iyue <ys1231@126.com> 1750946551 +0800 update by push
|
@@ -1,3 +1,5 @@
|
|
1
1
|
2e11d561668286f3d6d48f399ffab1943a5cdcb2 f22bfffda9fb6a7f37bed04b7c40c9466ef09454 iyue <ys1231@126.com> 1750925918 +0800 update by push
|
2
2
|
f22bfffda9fb6a7f37bed04b7c40c9466ef09454 e82f42ea26b8bf4f0bc92c0648ac8f190f14226d iyue <ys1231@126.com> 1750927669 +0800 update by push
|
3
3
|
e82f42ea26b8bf4f0bc92c0648ac8f190f14226d 9ae313cdf64cd82416c1238eb493e6396f799f12 iyue <ys1231@126.com> 1750942915 +0800 update by push
|
4
|
+
9ae313cdf64cd82416c1238eb493e6396f799f12 4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d iyue <ys1231@126.com> 1750946393 +0800 update by push
|
5
|
+
4d6d457bfabc4af842e5ddc2d56eb059d5dfdc9d fce15b9dbffd9f37b1f2d46944ee2d0394df6565 iyue <ys1231@126.com> 1750946560 +0800 update by push
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
.git/refs/heads/iyue
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
fce15b9dbffd9f37b1f2d46944ee2d0394df6565
|
.git/refs/remotes/gitlab/iyue
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
fce15b9dbffd9f37b1f2d46944ee2d0394df6565
|
.git/refs/remotes/origin/iyue
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
fce15b9dbffd9f37b1f2d46944ee2d0394df6565
|
core/reconstructor.py
CHANGED
@@ -31,6 +31,9 @@ class JavaSourceAnalyzer:
|
|
31
31
|
self._current_class_name = None
|
32
32
|
# 初始化JavaParser用于字段类型解析
|
33
33
|
self.java_parser = JavaParser()
|
34
|
+
# 使用文件缓存系统优化I/O性能
|
35
|
+
from utils.file_cache import get_file_cache
|
36
|
+
self.file_cache = get_file_cache()
|
34
37
|
|
35
38
|
def set_current_class(self, class_name: str):
|
36
39
|
"""设置当前分析的类"""
|
@@ -295,19 +298,21 @@ class JavaSourceAnalyzer:
|
|
295
298
|
return None
|
296
299
|
|
297
300
|
def _load_class_content(self, class_name: str) -> Optional[str]:
|
298
|
-
"""
|
301
|
+
"""加载类的源码内容(使用缓存优化)"""
|
299
302
|
try:
|
300
303
|
# 标准路径:com.example.Model -> com/example/Model.java
|
301
304
|
file_path = class_name.replace('.', '/') + '.java'
|
302
305
|
full_path = self.sources_dir / file_path
|
303
306
|
|
304
|
-
|
305
|
-
|
307
|
+
# 使用缓存系统获取文件内容
|
308
|
+
content = self.file_cache.get_content(full_path)
|
309
|
+
if content:
|
310
|
+
return content
|
306
311
|
|
307
312
|
# 备选方案:按简单类名搜索
|
308
313
|
simple_name = class_name.split('.')[-1]
|
309
314
|
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
310
|
-
return
|
315
|
+
return self.file_cache.get_content(java_file)
|
311
316
|
|
312
317
|
return None
|
313
318
|
except Exception:
|
@@ -346,6 +351,10 @@ class ProtoReconstructor:
|
|
346
351
|
self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
|
347
352
|
self.info_decoder.java_source_analyzer = self.java_source_analyzer
|
348
353
|
|
354
|
+
# 初始化类型索引(延迟加载)
|
355
|
+
from utils.type_index import get_type_index
|
356
|
+
self.type_index = get_type_index(sources_dir)
|
357
|
+
|
349
358
|
# 任务调度状态
|
350
359
|
self.processed_classes: Set[str] = set() # 已处理的类
|
351
360
|
self.pending_classes: deque = deque() # 待处理的类队列
|
@@ -383,7 +392,15 @@ class ProtoReconstructor:
|
|
383
392
|
# 4. 生成proto文件
|
384
393
|
self._generate_all_proto_files()
|
385
394
|
|
386
|
-
# 5.
|
395
|
+
# 5. 输出性能统计信息
|
396
|
+
from utils.file_cache import get_file_cache
|
397
|
+
file_cache = get_file_cache()
|
398
|
+
file_cache.print_stats()
|
399
|
+
|
400
|
+
# 输出类型索引统计
|
401
|
+
self.type_index.print_stats()
|
402
|
+
|
403
|
+
# 6. 返回统计信息
|
387
404
|
# 报告未知类型统计
|
388
405
|
self._report_unknown_types()
|
389
406
|
|
@@ -913,7 +930,7 @@ class ProtoReconstructor:
|
|
913
930
|
|
914
931
|
def _find_best_matching_class(self, type_name: str, current_package: str, current_class: str = None) -> Optional[str]:
|
915
932
|
"""
|
916
|
-
|
933
|
+
查找最佳匹配的类(使用索引优化)
|
917
934
|
|
918
935
|
Args:
|
919
936
|
type_name: 类型名(如 IdData)
|
@@ -935,7 +952,28 @@ class ProtoReconstructor:
|
|
935
952
|
self.logger.info(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
|
936
953
|
return None
|
937
954
|
|
938
|
-
#
|
955
|
+
# 使用类型索引进行快速匹配
|
956
|
+
best_match = self.type_index.find_best_match(type_name, current_package)
|
957
|
+
|
958
|
+
if best_match:
|
959
|
+
self.logger.info(f" 🔍 索引匹配: {type_name} -> {best_match}")
|
960
|
+
return best_match
|
961
|
+
|
962
|
+
# 索引未找到匹配,回退到传统方法(保留兼容性)
|
963
|
+
self.logger.debug(f" ⚠️ 索引未找到匹配,回退到目录扫描: {type_name}")
|
964
|
+
return self._fallback_directory_search(type_name, current_package)
|
965
|
+
|
966
|
+
def _fallback_directory_search(self, type_name: str, current_package: str) -> Optional[str]:
|
967
|
+
"""
|
968
|
+
回退的目录扫描方法(当索引匹配失败时使用)
|
969
|
+
|
970
|
+
Args:
|
971
|
+
type_name: 类型名
|
972
|
+
current_package: 当前包名
|
973
|
+
|
974
|
+
Returns:
|
975
|
+
匹配的类名或None
|
976
|
+
"""
|
939
977
|
matching_classes = []
|
940
978
|
|
941
979
|
# 在源码目录中搜索
|
@@ -961,7 +999,7 @@ class ProtoReconstructor:
|
|
961
999
|
matching_classes.sort(key=lambda x: x[1], reverse=True)
|
962
1000
|
best_match = matching_classes[0][0]
|
963
1001
|
|
964
|
-
self.logger.info(f" 🔍
|
1002
|
+
self.logger.info(f" 🔍 目录扫描匹配: {type_name} -> {best_match}")
|
965
1003
|
return best_match
|
966
1004
|
|
967
1005
|
def _is_basic_field_type(self, type_name: str, current_class: str = None) -> bool:
|
@@ -1125,8 +1163,12 @@ class ProtoReconstructor:
|
|
1125
1163
|
if not java_file:
|
1126
1164
|
return None
|
1127
1165
|
|
1128
|
-
#
|
1129
|
-
|
1166
|
+
# 使用缓存读取Java源码
|
1167
|
+
from utils.file_cache import get_file_cache
|
1168
|
+
file_cache = get_file_cache()
|
1169
|
+
content = file_cache.get_content(java_file)
|
1170
|
+
if not content:
|
1171
|
+
return None
|
1130
1172
|
|
1131
1173
|
# 查找字段声明模式:private SomeType fieldName_;
|
1132
1174
|
# 我们要找的是以inferred_type结尾的类型声明
|
generation/proto_generator.py
CHANGED
@@ -158,12 +158,13 @@ class ProtoGenerator:
|
|
158
158
|
"""
|
159
159
|
lines = [f'message {message_def.name} {{']
|
160
160
|
|
161
|
-
# 生成oneof
|
161
|
+
# 生成oneof字段(oneof字段内部也按tag排序)
|
162
162
|
for oneof in message_def.oneofs:
|
163
163
|
lines.extend(self._generate_oneof_definition(oneof))
|
164
164
|
|
165
|
-
#
|
166
|
-
|
165
|
+
# 生成常规字段(按tag排序)
|
166
|
+
sorted_fields = sorted(message_def.fields, key=lambda field: field.tag)
|
167
|
+
for field in sorted_fields:
|
167
168
|
lines.append(self._generate_field_definition(field))
|
168
169
|
|
169
170
|
lines.append('}')
|
@@ -181,19 +182,24 @@ class ProtoGenerator:
|
|
181
182
|
"""
|
182
183
|
lines = [f'enum {enum_def.name} {{']
|
183
184
|
|
184
|
-
#
|
185
|
-
|
185
|
+
# 生成枚举值(按value排序)
|
186
|
+
sorted_values = sorted(enum_def.values, key=lambda enum_value: enum_value.value)
|
187
|
+
for enum_value in sorted_values:
|
186
188
|
lines.append(f' {enum_value.name} = {enum_value.value};')
|
187
189
|
|
188
190
|
lines.append('}')
|
189
191
|
return lines
|
190
192
|
|
191
193
|
def _generate_oneof_definition(self, oneof) -> List[str]:
|
192
|
-
"""生成oneof
|
194
|
+
"""生成oneof字段定义(字段按tag排序)"""
|
193
195
|
lines = [f' oneof {oneof.name} {{']
|
194
|
-
|
196
|
+
|
197
|
+
# 对oneof内部的字段按tag排序
|
198
|
+
sorted_fields = sorted(oneof.fields, key=lambda field: field.tag)
|
199
|
+
for field in sorted_fields:
|
195
200
|
field_type = self._resolve_field_type(field)
|
196
201
|
lines.append(f' {field_type} {field.name} = {field.tag};')
|
202
|
+
|
197
203
|
lines.append(' }')
|
198
204
|
return lines
|
199
205
|
|
pyproject.toml
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
.git/COMMIT_EDITMSG,sha256=
|
1
|
+
.git/COMMIT_EDITMSG,sha256=OJTijUraf8zRGEsGyQ_Q3EFxeT206AEhvBZe9ph_cdI,28
|
2
2
|
.git/HEAD,sha256=ly8yuQLWQE8njkYAapHLPJ1xVg6_fMgQjCoHqKvFIdw,21
|
3
3
|
.git/config,sha256=9ZAySfDWBdI-K9eOXyrEQOhiX68AiaP0Z4CLzH07H10,399
|
4
4
|
.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
|
@@ -16,27 +16,37 @@
|
|
16
16
|
.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
|
17
17
|
.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
|
18
18
|
.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
19
|
-
.git/index,sha256=
|
19
|
+
.git/index,sha256=B1Rzt_0WDkah8dDUat5W191JgMFWphX94SqRGFNaulo,2167
|
20
20
|
.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
21
|
-
.git/logs/HEAD,sha256=
|
22
|
-
.git/logs/refs/heads/iyue,sha256=
|
23
|
-
.git/logs/refs/remotes/gitlab/iyue,sha256=
|
21
|
+
.git/logs/HEAD,sha256=xX-tAof-O9a3m9JXQU75r2QrQBSxtRQ3Vj2UoNr_qxM,1057
|
22
|
+
.git/logs/refs/heads/iyue,sha256=xX-tAof-O9a3m9JXQU75r2QrQBSxtRQ3Vj2UoNr_qxM,1057
|
23
|
+
.git/logs/refs/remotes/gitlab/iyue,sha256=UPB7A4ZXzkMHpkWp5AG846jGDLEBUY_hr0fMbb4h_V0,680
|
24
24
|
.git/logs/refs/remotes/origin/HEAD,sha256=4vCPTgC0N80jhmkFXNUlYvHtGllYJkQTGZEn6LlqZgs,163
|
25
|
-
.git/logs/refs/remotes/origin/iyue,sha256=
|
25
|
+
.git/logs/refs/remotes/origin/iyue,sha256=J3u7O9zrNOAvsYpwKq0BCIPq1sDWaw7mTbhPD-tAxog,680
|
26
26
|
.git/objects/09/a92517fe9eeb33d2fd7c979e01d163665f7abc,sha256=wyFL5jJ16f0xCpL1MB5EiJvs2opieE0hMEmyDpUV3ps,9356
|
27
27
|
.git/objects/14/d323d58fb90c209a730c17e23fd82a6c735fc1,sha256=rTCuhPM5LYusLIMDv7CLC557SlRFvvbBzgr4jwrjMVg,426
|
28
|
+
.git/objects/15/eb3f02479e633439ec83c143e703f8448043a1,sha256=duJXtvq_NNhYKG7WdejXgoMlyHIaCUG5jv9SJpU_orc,14711
|
28
29
|
.git/objects/18/89a4fb55eb3abdc528ce87f0cea039278c06fd,sha256=Uz8TPtDNnPWR5pfsIx7tMDCDFBdWLIkaPcnYPIJ12oM,2880
|
29
30
|
.git/objects/20/cf56ec106bcd66420dd000279f983571b918b6,sha256=0dwZwG7CpR7yYYNvgYDxHLPj2P7eRLK7fTW_En419pk,166
|
30
31
|
.git/objects/21/55b64d52922c88527c102d62f23e5c2abbae79,sha256=6LLW2HrSfqqVwvO59yc-EcUp6MHw6zfHwu8TB775dII,13853
|
31
32
|
.git/objects/23/bfbca38e177bcbb423dd782c35b19d127a5ab0,sha256=hYW2b3t3bVGNh4pqyyA7DMlyF4LfNZco5NwedWMwwuI,3700
|
33
|
+
.git/objects/26/1f67f3b731b32f6d77de9dd7be2d61e2a14ace,sha256=mcasCBpCulVy2N-X_d2hg8567PdhZRof4ea5nYoelJM,96
|
34
|
+
.git/objects/2e/2c1c42f5ac5d665cc672d3792078b756d9ab0e,sha256=5LG3jHGORJeswNw0lz5dc6QSsxztyMstbRDADhZDnZY,166
|
32
35
|
.git/objects/33/181441ab38eded005db356da89b54c7d29f452,sha256=iIM6a2aGs_TGZjxHKnkYYyZj_hA77nAF7r2lMOtN1VA,2021
|
33
36
|
.git/objects/33/52dfa8f5d9eb46cc98ea7ccecf02e4d9df95f7,sha256=KNAvQNdSalt8CxCZVVb8e6KBC4zqVtusT5MriFLPbIQ,4409
|
37
|
+
.git/objects/35/8bace20b731ff1bbb256d2a0158dfc84720978,sha256=hEPGT0bRzT1GlXErWRWJ7ogflTWj3SutlHRKCBH1TII,426
|
34
38
|
.git/objects/3c/6f0120229cc2cd8123efbeb7f186eb0a485f29,sha256=iyh7Xt-m7vou3slm58k8xSl3Cn2yYXTuv5WziSWHrW0,802
|
35
39
|
.git/objects/40/84f4567d983a977c49598b7d886e46b13ff50b,sha256=R3w6EvxpJRcVuJvbCslrGDK07kcOjECGGosj5tO-JBo,426
|
36
40
|
.git/objects/48/369b05749e384be9be58e5f943f3a0040d0f37,sha256=KP9dXx-XniZuIEXESlrL_MWWi55zGiFr7xtmp3oD67o,8551
|
41
|
+
.git/objects/4d/6d457bfabc4af842e5ddc2d56eb059d5dfdc9d,sha256=HLQ8aGvln8ZuhdINhhbr_aTt2BttkJOY8k6LhciAMLE,672
|
42
|
+
.git/objects/55/6723fdd4f525eed41c52fa80defca3f0c81c47,sha256=vwGNz5lxU_wrrRWVPU64TccYo4Vo4uUGC3-GwVIs1xs,4520
|
43
|
+
.git/objects/65/a4f0ada7519f8b1e6a7c7e287541b8effde9fd,sha256=jSGkVjAMcm2nrF6w_0U7lmzoBgwIJaf4P8POOlABxEQ,188
|
37
44
|
.git/objects/76/311aa8e59d780763e0d66787067cc5d9613a67,sha256=J9Xy8ESXbWUbC_4sA0KumGVKm09tzlqA8C5oJ63a_b4,2167
|
38
45
|
.git/objects/7c/00eec7ae9ef5f94fc337e5c8f9793a2a48810d,sha256=Ju-Yuy5EARcIThU4vkHrZuHQWu3MjXSZYGtBDTWR18c,96
|
39
46
|
.git/objects/8c/809c42c7ae13007fd885ee7bcffae7acf2c520,sha256=HYoU3_cZUGur3rWrgRvzHRAv0JfrXgc34R8UeGbC5Bc,14180
|
47
|
+
.git/objects/8d/44142ae2d6dbb59d4ebed8587bccd051e5766b,sha256=l1G5ZzQBQW8Lxd3WvGLRcY6quM6dBL4zdNT5ovjv3Dc,802
|
48
|
+
.git/objects/8d/4a5767bef0c342f1660526f9671c0944922c40,sha256=kP0nfEnyNmuEJ4wLQabhPPx60OIa5Dxqn7R9Ni89CqY,1900
|
49
|
+
.git/objects/95/295a15779ebefd563ec777c3d3cced7e8d0209,sha256=IIN8_K0K-3LAkUI4Ra3DS8yUsfpsqteSDGhgvRpSfZ4,426
|
40
50
|
.git/objects/97/56fe0931216a7c40cbf250e1ab8a6dfd589f13,sha256=Su7Q-bXO0K9QKEGC249NVuUS62xmQs5TgWfEmdbauaI,97
|
41
51
|
.git/objects/9a/e313cdf64cd82416c1238eb493e6396f799f12,sha256=9bPRhebwMVqtGqJUxfVq-TEUxceuOdSvicWe1H8F7Ks,876
|
42
52
|
.git/objects/a3/cedc28e563a1845a7860161b39b0fe58d5f0d3,sha256=Vee0H8dhSj4ERNDSAnRU6vebs0tbY1-lgYr1Nz5X7bc,122
|
@@ -44,6 +54,7 @@
|
|
44
54
|
.git/objects/cd/2d6c229438c6b1c694b9392a85888d89ef49c1,sha256=IW9iGk3Eb-i_Ht64HQcltVp27ziQHz5IP6k5p57YFtY,122
|
45
55
|
.git/objects/d0/9c84ad2142a187bf26a1714b7041b62e404c8f,sha256=twhhaZbfU_mgu5LIlt9ZeGjp_PT_gri_9H231H5nrNc,2172
|
46
56
|
.git/objects/d9/6d7456245232b3e159dcf691f11e51224c557a,sha256=3w45hxiT2rn6ljSdgo36fHMMWZt30r5C1FPvbnjq9tc,597
|
57
|
+
.git/objects/db/beedb30613f79ae3ff67df1428cf8ade223711,sha256=tTGayiwSvdy-uBiXQKRhNnToZ_dc1BmeXZ2mltuRfxY,3812
|
47
58
|
.git/objects/e8/1433b6ad92206cdadbee1f474b4f99383314cb,sha256=mRMdmx6wCs6b-63QL72Iutm2AUVCOIYs7J5XbAsLluc,426
|
48
59
|
.git/objects/e8/2f42ea26b8bf4f0bc92c0648ac8f190f14226d,sha256=xtbEwloZnqXdNdViAevzyprosfPQ1P--xAu6URGlaYI,155
|
49
60
|
.git/objects/e9/a15996cb55ac72aeb6611d26e8d22246589943,sha256=sTn6X3xMp3_WYpj1CmP2hDqviX86aJHTCzzi_ZTpBKw,4643
|
@@ -52,14 +63,15 @@
|
|
52
63
|
.git/objects/f2/2bfffda9fb6a7f37bed04b7c40c9466ef09454,sha256=goQNyEV3NqzS28mMXuus1SWzwb8yXnvEPef14Ugzh5Q,750
|
53
64
|
.git/objects/f5/18c69a6e1bf3052b79da01502b2837ea58f0f4,sha256=r98SFbCvw7yeAYTjzkw-2VMhOVbpm79FLZQiC5A1-pw,123
|
54
65
|
.git/objects/f7/25a430eb3364460ba854dbc8809edc21dc6c70,sha256=LdyMmAWGLWbn5G5fC7eAPi4NFOboKcJ6iJJdxENSUCk,2505
|
66
|
+
.git/objects/fc/e15b9dbffd9f37b1f2d46944ee2d0394df6565,sha256=81qdzKkVZ5C_1oej2Mm0q2RLifilVFolIs95mrUfpg4,163
|
55
67
|
.git/objects/pack/pack-289f7bb06603881c49190e6036de6390223baf77.idx,sha256=KRv1SBEe9bYL9d5V301LwPOBqFs3tMnB0dl4PriMMIw,2416
|
56
68
|
.git/objects/pack/pack-289f7bb06603881c49190e6036de6390223baf77.pack,sha256=R4oMXhr-NcPAUBXStCbAaUvGUDW_SBECmPIbiB9Gy48,42003
|
57
69
|
.git/objects/pack/pack-289f7bb06603881c49190e6036de6390223baf77.rev,sha256=22_3sqyTNB5IbwAK5oCqNz0UxDDqoZ5jcF7p_44Lalo,244
|
58
70
|
.git/packed-refs,sha256=4H0m4wd6q98wZkBk7WfvVeTBuuxQdTE65XxsswYP-oQ,112
|
59
|
-
.git/refs/heads/iyue,sha256=
|
60
|
-
.git/refs/remotes/gitlab/iyue,sha256=
|
71
|
+
.git/refs/heads/iyue,sha256=3yox20YC-z9tSZ5Qrjy3DA1_ZXUQcLCKXhdXBcw7sno,41
|
72
|
+
.git/refs/remotes/gitlab/iyue,sha256=3yox20YC-z9tSZ5Qrjy3DA1_ZXUQcLCKXhdXBcw7sno,41
|
61
73
|
.git/refs/remotes/origin/HEAD,sha256=G8pFPTbCqnJ2IkCzz9J-STqOXWU6TrlXfpt1wx5bUWE,30
|
62
|
-
.git/refs/remotes/origin/iyue,sha256=
|
74
|
+
.git/refs/remotes/origin/iyue,sha256=3yox20YC-z9tSZ5Qrjy3DA1_ZXUQcLCKXhdXBcw7sno,41
|
63
75
|
.gitignore,sha256=ThRUL1E38Z_Q4Lww0xYYl9ziFaPUvvwwqxbmp2cuwco,2068
|
64
76
|
.python-version,sha256=NxOs_9lT1XG8y-FjlRru-YinX5RcBJt_ulPwgDESZ_o,7
|
65
77
|
ARCHITECTURE.md,sha256=JALnUdREwdLRAmcUit5CGAPLpeZ6UfoYpAQkxil3NJc,8322
|
@@ -67,21 +79,23 @@ README.md,sha256=1EC39Oeth4evP26KNWEMFG-hSrFNhJIAVOSSOBhc9cY,4244
|
|
67
79
|
core/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
68
80
|
core/bytecode_parser.py,sha256=87ZGhnyBNGP-gRjIyRUcGfWS2HR1YA_e1OBKafLEEDc,11532
|
69
81
|
core/info_decoder.py,sha256=wQMZoiQ7adK1xrAhZ0GOIqeLzvf3-TO7_EAERy4EX6g,52682
|
70
|
-
core/reconstructor.py,sha256=
|
82
|
+
core/reconstructor.py,sha256=RAA8ugrzNWg6Yc3cXu_i3ietc8aNzwh2ocGHTsEVgp8,53354
|
71
83
|
generation/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
72
|
-
generation/proto_generator.py,sha256=
|
84
|
+
generation/proto_generator.py,sha256=aeqKebkZ-GZAFSt2WWApC0CB3rk8iiaxYeVDm4ZD6E0,17264
|
73
85
|
main.py,sha256=3G649-aTByEjMS4tb1A9wcO9NVUxXVSvdwGTq9hXxBI,7081
|
74
86
|
models/__init__.py,sha256=WScv63rvEl65y5CWjpb6__hvjNvjpCkl6lz1Z2u0IYc,811
|
75
87
|
models/message_definition.py,sha256=AszUZnNPSBn9SMXne5ORDBiGZz1W2pcYmU8ftGC3Mks,4873
|
76
88
|
parsing/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
77
89
|
parsing/enum_parser.py,sha256=4BjMk1NIYgt2FDZUjTZyOvFfC1uJp_MBZipVdk1tqes,6175
|
78
90
|
parsing/java_parser.py,sha256=ECaiU9cZMNUZqoV1mrKRvaSFjZXha-lVgB59w6MuwxY,14761
|
79
|
-
pyproject.toml,sha256=
|
91
|
+
pyproject.toml,sha256=U3UQ2y7O9hz_q3yMTFvBulIUdAPNroQzrVCIr4LciCc,1378
|
80
92
|
requirements.txt,sha256=cQGj3IS6Kj88jbwj_jeKkokMnG-fEezWve91mfW4CJs,96
|
81
93
|
utils/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
94
|
+
utils/file_cache.py,sha256=eOBkfrumWg0EU1EfLESnRN1kcidi_MocxV_EWP8lEhQ,5207
|
82
95
|
utils/file_utils.py,sha256=N1Ei7hmyeOkIyZJFEXyNbtTrfeVF2hP-U8evXAVW2MA,4085
|
83
96
|
utils/logger.py,sha256=473DfzFVXzdGpiRTJGY6bBd-V5G80_P07gQqcvDChpQ,2447
|
84
|
-
|
85
|
-
reproto-0.0.
|
86
|
-
reproto-0.0.
|
87
|
-
reproto-0.0.
|
97
|
+
utils/type_index.py,sha256=So8FYyGtR0ff-ZhVHjXUdhXiXQEujlcCEJlj7OPQMsg,12128
|
98
|
+
reproto-0.0.8.dist-info/METADATA,sha256=Eo2Hrl1W_fX0u4AaPgKXk2Im_hYvu_snqG1HyTqOz_Y,5193
|
99
|
+
reproto-0.0.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
100
|
+
reproto-0.0.8.dist-info/entry_points.txt,sha256=6Oro9lK_2DXDgHiB3andNuIE78wxfooqacqp8yY1C-g,37
|
101
|
+
reproto-0.0.8.dist-info/RECORD,,
|
utils/file_cache.py
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
"""
|
2
|
+
文件内容缓存系统
|
3
|
+
|
4
|
+
用于缓存Java源码文件内容,避免重复的文件I/O操作
|
5
|
+
这是解决reproto性能瓶颈的核心优化组件
|
6
|
+
|
7
|
+
Author: AI Assistant
|
8
|
+
"""
|
9
|
+
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Optional, Dict
|
12
|
+
import threading
|
13
|
+
from utils.logger import get_logger
|
14
|
+
|
15
|
+
|
16
|
+
class FileContentCache:
|
17
|
+
"""
|
18
|
+
文件内容缓存系统
|
19
|
+
|
20
|
+
提供线程安全的文件内容缓存,显著减少重复的文件I/O操作
|
21
|
+
特别适用于需要多次读取同一Java文件的场景
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(self):
|
25
|
+
"""初始化缓存系统"""
|
26
|
+
self._cache: Dict[str, str] = {}
|
27
|
+
self._stats = {
|
28
|
+
'hits': 0,
|
29
|
+
'misses': 0,
|
30
|
+
'errors': 0
|
31
|
+
}
|
32
|
+
self._lock = threading.RLock() # 使用可重入锁
|
33
|
+
self.logger = get_logger("file_cache")
|
34
|
+
|
35
|
+
def get_content(self, file_path: Path) -> Optional[str]:
|
36
|
+
"""
|
37
|
+
获取文件内容,优先从缓存读取
|
38
|
+
|
39
|
+
Args:
|
40
|
+
file_path: 文件路径
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
文件内容字符串,如果读取失败则返回None
|
44
|
+
"""
|
45
|
+
cache_key = str(file_path.resolve())
|
46
|
+
|
47
|
+
with self._lock:
|
48
|
+
# 检查缓存
|
49
|
+
if cache_key in self._cache:
|
50
|
+
self._stats['hits'] += 1
|
51
|
+
return self._cache[cache_key]
|
52
|
+
|
53
|
+
# 缓存未命中,读取文件
|
54
|
+
try:
|
55
|
+
if not file_path.exists():
|
56
|
+
self._stats['errors'] += 1
|
57
|
+
return None
|
58
|
+
|
59
|
+
content = file_path.read_text(encoding='utf-8')
|
60
|
+
self._cache[cache_key] = content
|
61
|
+
self._stats['misses'] += 1
|
62
|
+
|
63
|
+
# 定期输出缓存统计
|
64
|
+
total_requests = self._stats['hits'] + self._stats['misses']
|
65
|
+
if total_requests % 50 == 0 and total_requests > 0:
|
66
|
+
hit_rate = self._stats['hits'] / total_requests * 100
|
67
|
+
self.logger.debug(f"📊 缓存统计: {total_requests} 次请求, 命中率 {hit_rate:.1f}%")
|
68
|
+
|
69
|
+
return content
|
70
|
+
|
71
|
+
except Exception as e:
|
72
|
+
self._stats['errors'] += 1
|
73
|
+
self.logger.warning(f"⚠️ 读取文件失败 {file_path}: {e}")
|
74
|
+
return None
|
75
|
+
|
76
|
+
def preload_files(self, file_paths: list[Path]) -> int:
|
77
|
+
"""
|
78
|
+
预加载文件列表到缓存
|
79
|
+
|
80
|
+
Args:
|
81
|
+
file_paths: 要预加载的文件路径列表
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
成功预加载的文件数量
|
85
|
+
"""
|
86
|
+
loaded_count = 0
|
87
|
+
|
88
|
+
for file_path in file_paths:
|
89
|
+
if self.get_content(file_path) is not None:
|
90
|
+
loaded_count += 1
|
91
|
+
|
92
|
+
self.logger.info(f"📁 预加载完成: {loaded_count}/{len(file_paths)} 个文件")
|
93
|
+
return loaded_count
|
94
|
+
|
95
|
+
def clear_cache(self):
|
96
|
+
"""清空缓存"""
|
97
|
+
with self._lock:
|
98
|
+
self._cache.clear()
|
99
|
+
self.logger.info("🗑️ 缓存已清空")
|
100
|
+
|
101
|
+
def get_stats(self) -> dict:
|
102
|
+
"""
|
103
|
+
获取缓存统计信息
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
包含缓存统计的字典
|
107
|
+
"""
|
108
|
+
with self._lock:
|
109
|
+
total_requests = self._stats['hits'] + self._stats['misses']
|
110
|
+
hit_rate = (self._stats['hits'] / total_requests * 100) if total_requests > 0 else 0
|
111
|
+
|
112
|
+
return {
|
113
|
+
'total_requests': total_requests,
|
114
|
+
'cache_hits': self._stats['hits'],
|
115
|
+
'cache_misses': self._stats['misses'],
|
116
|
+
'hit_rate_percent': hit_rate,
|
117
|
+
'errors': self._stats['errors'],
|
118
|
+
'cached_files': len(self._cache)
|
119
|
+
}
|
120
|
+
|
121
|
+
def print_stats(self):
|
122
|
+
"""打印缓存统计信息"""
|
123
|
+
stats = self.get_stats()
|
124
|
+
|
125
|
+
self.logger.info("📊 文件缓存统计:")
|
126
|
+
self.logger.info(f" 总请求数: {stats['total_requests']}")
|
127
|
+
self.logger.info(f" 缓存命中: {stats['cache_hits']}")
|
128
|
+
self.logger.info(f" 缓存未命中: {stats['cache_misses']}")
|
129
|
+
self.logger.info(f" 命中率: {stats['hit_rate_percent']:.1f}%")
|
130
|
+
self.logger.info(f" 错误数: {stats['errors']}")
|
131
|
+
self.logger.info(f" 已缓存文件: {stats['cached_files']}")
|
132
|
+
|
133
|
+
# 计算性能提升
|
134
|
+
if stats['cache_hits'] > 0:
|
135
|
+
io_saved = stats['cache_hits']
|
136
|
+
self.logger.info(f" 🚀 节省I/O操作: {io_saved} 次")
|
137
|
+
|
138
|
+
|
139
|
+
# 全局缓存实例
|
140
|
+
_global_cache = None
|
141
|
+
_cache_lock = threading.Lock()
|
142
|
+
|
143
|
+
|
144
|
+
def get_file_cache() -> FileContentCache:
|
145
|
+
"""
|
146
|
+
获取全局文件缓存实例(单例模式)
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
FileContentCache实例
|
150
|
+
"""
|
151
|
+
global _global_cache
|
152
|
+
|
153
|
+
if _global_cache is None:
|
154
|
+
with _cache_lock:
|
155
|
+
if _global_cache is None:
|
156
|
+
_global_cache = FileContentCache()
|
157
|
+
|
158
|
+
return _global_cache
|
159
|
+
|
160
|
+
|
161
|
+
def clear_global_cache():
|
162
|
+
"""清空全局缓存"""
|
163
|
+
global _global_cache
|
164
|
+
if _global_cache is not None:
|
165
|
+
_global_cache.clear_cache()
|
utils/type_index.py
ADDED
@@ -0,0 +1,341 @@
|
|
1
|
+
"""
|
2
|
+
类型匹配索引系统
|
3
|
+
|
4
|
+
用于快速查找Java类型,避免重复的目录扫描操作
|
5
|
+
这是解决reproto性能瓶颈的第二个核心优化组件
|
6
|
+
|
7
|
+
Author: AI Assistant
|
8
|
+
"""
|
9
|
+
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Dict, List, Optional, Set
|
12
|
+
import threading
|
13
|
+
from utils.logger import get_logger
|
14
|
+
|
15
|
+
|
16
|
+
class TypeMatchingIndex:
|
17
|
+
"""
|
18
|
+
类型匹配索引系统
|
19
|
+
|
20
|
+
构建从类型名到完整类名的多级索引,支持:
|
21
|
+
- 精确匹配(完整类名)
|
22
|
+
- 简单名匹配(类名)
|
23
|
+
- 后缀匹配(如 IdData -> ContactIdData)
|
24
|
+
- 包名相似度匹配
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, sources_dir: Path):
|
28
|
+
"""
|
29
|
+
初始化索引系统
|
30
|
+
|
31
|
+
Args:
|
32
|
+
sources_dir: Java源码根目录
|
33
|
+
"""
|
34
|
+
self.sources_dir = sources_dir
|
35
|
+
self.logger = get_logger("type_index")
|
36
|
+
|
37
|
+
# 多级索引结构
|
38
|
+
self._exact_index: Dict[str, str] = {} # 完整类名 -> 完整类名
|
39
|
+
self._simple_index: Dict[str, List[str]] = {} # 简单类名 -> [完整类名列表]
|
40
|
+
self._suffix_index: Dict[str, List[str]] = {} # 后缀 -> [完整类名列表]
|
41
|
+
self._package_index: Dict[str, List[str]] = {} # 包名 -> [完整类名列表]
|
42
|
+
|
43
|
+
# 基础类型集合(快速过滤)
|
44
|
+
self._basic_types: Set[str] = {
|
45
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
46
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
47
|
+
'Byte', 'Short', 'Character', 'List', 'Map', 'Set', 'Collection'
|
48
|
+
}
|
49
|
+
|
50
|
+
# 索引统计
|
51
|
+
self._stats = {
|
52
|
+
'total_classes': 0,
|
53
|
+
'index_hits': 0,
|
54
|
+
'index_misses': 0,
|
55
|
+
'basic_type_skips': 0
|
56
|
+
}
|
57
|
+
|
58
|
+
self._lock = threading.RLock()
|
59
|
+
self._is_built = False
|
60
|
+
|
61
|
+
def build_index(self) -> None:
|
62
|
+
"""构建所有索引"""
|
63
|
+
if self._is_built:
|
64
|
+
return
|
65
|
+
|
66
|
+
with self._lock:
|
67
|
+
if self._is_built:
|
68
|
+
return
|
69
|
+
|
70
|
+
self.logger.info("🏗️ 开始构建类型索引...")
|
71
|
+
|
72
|
+
# 扫描所有Java文件
|
73
|
+
java_files = list(self.sources_dir.rglob("*.java"))
|
74
|
+
self.logger.info(f"📁 发现 {len(java_files)} 个Java文件")
|
75
|
+
|
76
|
+
for java_file in java_files:
|
77
|
+
self._index_single_file(java_file)
|
78
|
+
|
79
|
+
self._stats['total_classes'] = len(self._exact_index)
|
80
|
+
self._is_built = True
|
81
|
+
|
82
|
+
self.logger.info(f"✅ 索引构建完成: {self._stats['total_classes']} 个类")
|
83
|
+
self._print_index_stats()
|
84
|
+
|
85
|
+
def _index_single_file(self, java_file: Path) -> None:
|
86
|
+
"""
|
87
|
+
为单个Java文件建立索引
|
88
|
+
|
89
|
+
Args:
|
90
|
+
java_file: Java文件路径
|
91
|
+
"""
|
92
|
+
try:
|
93
|
+
# 获取类名和包名
|
94
|
+
class_name = java_file.stem
|
95
|
+
relative_path = java_file.relative_to(self.sources_dir)
|
96
|
+
package_parts = relative_path.parts[:-1] # 排除文件名
|
97
|
+
|
98
|
+
if package_parts:
|
99
|
+
package_name = '.'.join(package_parts)
|
100
|
+
full_class_name = f"{package_name}.{class_name}"
|
101
|
+
else:
|
102
|
+
package_name = ""
|
103
|
+
full_class_name = class_name
|
104
|
+
|
105
|
+
# 1. 精确索引:完整类名
|
106
|
+
self._exact_index[full_class_name] = full_class_name
|
107
|
+
|
108
|
+
# 2. 简单名索引
|
109
|
+
if class_name not in self._simple_index:
|
110
|
+
self._simple_index[class_name] = []
|
111
|
+
self._simple_index[class_name].append(full_class_name)
|
112
|
+
|
113
|
+
# 3. 后缀索引(用于匹配如 IdData -> ContactIdData)
|
114
|
+
if len(class_name) > 4:
|
115
|
+
for suffix_len in [4, 6, 8]: # 多种后缀长度
|
116
|
+
if len(class_name) >= suffix_len:
|
117
|
+
suffix = class_name[-suffix_len:]
|
118
|
+
if suffix not in self._suffix_index:
|
119
|
+
self._suffix_index[suffix] = []
|
120
|
+
self._suffix_index[suffix].append(full_class_name)
|
121
|
+
|
122
|
+
# 4. 包名索引
|
123
|
+
if package_name:
|
124
|
+
if package_name not in self._package_index:
|
125
|
+
self._package_index[package_name] = []
|
126
|
+
self._package_index[package_name].append(full_class_name)
|
127
|
+
|
128
|
+
except Exception as e:
|
129
|
+
self.logger.warning(f"⚠️ 索引文件失败 {java_file}: {e}")
|
130
|
+
|
131
|
+
def find_best_match(self, type_name: str, current_package: str = "") -> Optional[str]:
|
132
|
+
"""
|
133
|
+
查找类型名的最佳匹配
|
134
|
+
|
135
|
+
Args:
|
136
|
+
type_name: 要查找的类型名
|
137
|
+
current_package: 当前包名(用于相似度计算)
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
最佳匹配的完整类名,如果没有找到则返回None
|
141
|
+
"""
|
142
|
+
if not self._is_built:
|
143
|
+
self.build_index()
|
144
|
+
|
145
|
+
# 快速过滤基础类型
|
146
|
+
if type_name in self._basic_types:
|
147
|
+
self._stats['basic_type_skips'] += 1
|
148
|
+
return None
|
149
|
+
|
150
|
+
with self._lock:
|
151
|
+
# 1. 精确匹配
|
152
|
+
if type_name in self._exact_index:
|
153
|
+
self._stats['index_hits'] += 1
|
154
|
+
return self._exact_index[type_name]
|
155
|
+
|
156
|
+
# 2. 简单名匹配
|
157
|
+
if type_name in self._simple_index:
|
158
|
+
candidates = self._simple_index[type_name]
|
159
|
+
if len(candidates) == 1:
|
160
|
+
self._stats['index_hits'] += 1
|
161
|
+
return candidates[0]
|
162
|
+
else:
|
163
|
+
# 多个候选,选择包名最相似的
|
164
|
+
best_match = self._select_best_by_package(candidates, current_package)
|
165
|
+
if best_match:
|
166
|
+
self._stats['index_hits'] += 1
|
167
|
+
return best_match
|
168
|
+
|
169
|
+
# 3. 后缀匹配
|
170
|
+
for suffix_len in [4, 6, 8]:
|
171
|
+
if len(type_name) >= suffix_len:
|
172
|
+
suffix = type_name[-suffix_len:]
|
173
|
+
if suffix in self._suffix_index:
|
174
|
+
candidates = self._suffix_index[suffix]
|
175
|
+
# 过滤:确保候选类名以type_name结尾
|
176
|
+
filtered_candidates = [
|
177
|
+
c for c in candidates
|
178
|
+
if c.split('.')[-1].endswith(type_name)
|
179
|
+
]
|
180
|
+
if filtered_candidates:
|
181
|
+
best_match = self._select_best_by_package(filtered_candidates, current_package)
|
182
|
+
if best_match:
|
183
|
+
self._stats['index_hits'] += 1
|
184
|
+
return best_match
|
185
|
+
|
186
|
+
# 4. 未找到匹配
|
187
|
+
self._stats['index_misses'] += 1
|
188
|
+
return None
|
189
|
+
|
190
|
+
def _select_best_by_package(self, candidates: List[str], current_package: str) -> Optional[str]:
|
191
|
+
"""
|
192
|
+
根据包名相似度选择最佳候选
|
193
|
+
|
194
|
+
Args:
|
195
|
+
candidates: 候选类名列表
|
196
|
+
current_package: 当前包名
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
最佳匹配的类名
|
200
|
+
"""
|
201
|
+
if not candidates:
|
202
|
+
return None
|
203
|
+
|
204
|
+
if len(candidates) == 1:
|
205
|
+
return candidates[0]
|
206
|
+
|
207
|
+
if not current_package:
|
208
|
+
return candidates[0] # 无包名信息时返回第一个
|
209
|
+
|
210
|
+
# 计算包名相似度
|
211
|
+
best_candidate = None
|
212
|
+
best_similarity = -1
|
213
|
+
|
214
|
+
for candidate in candidates:
|
215
|
+
candidate_package = '.'.join(candidate.split('.')[:-1])
|
216
|
+
similarity = self._calculate_package_similarity(candidate_package, current_package)
|
217
|
+
|
218
|
+
if similarity > best_similarity:
|
219
|
+
best_similarity = similarity
|
220
|
+
best_candidate = candidate
|
221
|
+
|
222
|
+
return best_candidate
|
223
|
+
|
224
|
+
def _calculate_package_similarity(self, package1: str, package2: str) -> float:
|
225
|
+
"""
|
226
|
+
计算两个包名的相似度
|
227
|
+
|
228
|
+
Args:
|
229
|
+
package1: 第一个包名
|
230
|
+
package2: 第二个包名
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
相似度分数(0-1)
|
234
|
+
"""
|
235
|
+
if not package1 or not package2:
|
236
|
+
return 0.0
|
237
|
+
|
238
|
+
parts1 = package1.split('.')
|
239
|
+
parts2 = package2.split('.')
|
240
|
+
|
241
|
+
# 计算公共前缀长度
|
242
|
+
common_prefix = 0
|
243
|
+
for i in range(min(len(parts1), len(parts2))):
|
244
|
+
if parts1[i] == parts2[i]:
|
245
|
+
common_prefix += 1
|
246
|
+
else:
|
247
|
+
break
|
248
|
+
|
249
|
+
# 相似度 = 公共前缀长度 / 最大包深度
|
250
|
+
max_depth = max(len(parts1), len(parts2))
|
251
|
+
return common_prefix / max_depth if max_depth > 0 else 0.0
|
252
|
+
|
253
|
+
def get_classes_in_package(self, package_name: str) -> List[str]:
|
254
|
+
"""
|
255
|
+
获取指定包中的所有类
|
256
|
+
|
257
|
+
Args:
|
258
|
+
package_name: 包名
|
259
|
+
|
260
|
+
Returns:
|
261
|
+
类名列表
|
262
|
+
"""
|
263
|
+
if not self._is_built:
|
264
|
+
self.build_index()
|
265
|
+
|
266
|
+
return self._package_index.get(package_name, [])
|
267
|
+
|
268
|
+
def get_stats(self) -> dict:
|
269
|
+
"""
|
270
|
+
获取索引统计信息
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
包含索引统计的字典
|
274
|
+
"""
|
275
|
+
with self._lock:
|
276
|
+
total_requests = self._stats['index_hits'] + self._stats['index_misses']
|
277
|
+
hit_rate = (self._stats['index_hits'] / total_requests * 100) if total_requests > 0 else 0
|
278
|
+
|
279
|
+
return {
|
280
|
+
'total_classes': self._stats['total_classes'],
|
281
|
+
'total_requests': total_requests,
|
282
|
+
'index_hits': self._stats['index_hits'],
|
283
|
+
'index_misses': self._stats['index_misses'],
|
284
|
+
'hit_rate_percent': hit_rate,
|
285
|
+
'basic_type_skips': self._stats['basic_type_skips'],
|
286
|
+
'is_built': self._is_built
|
287
|
+
}
|
288
|
+
|
289
|
+
def _print_index_stats(self):
|
290
|
+
"""打印索引构建统计"""
|
291
|
+
self.logger.info("📊 索引统计:")
|
292
|
+
self.logger.info(f" 精确索引: {len(self._exact_index)} 个类")
|
293
|
+
self.logger.info(f" 简单名索引: {len(self._simple_index)} 个条目")
|
294
|
+
self.logger.info(f" 后缀索引: {len(self._suffix_index)} 个条目")
|
295
|
+
self.logger.info(f" 包名索引: {len(self._package_index)} 个包")
|
296
|
+
|
297
|
+
def print_stats(self):
|
298
|
+
"""打印使用统计信息"""
|
299
|
+
stats = self.get_stats()
|
300
|
+
|
301
|
+
self.logger.info("📊 类型索引统计:")
|
302
|
+
self.logger.info(f" 总类数: {stats['total_classes']}")
|
303
|
+
self.logger.info(f" 查询请求: {stats['total_requests']}")
|
304
|
+
self.logger.info(f" 索引命中: {stats['index_hits']}")
|
305
|
+
self.logger.info(f" 索引未命中: {stats['index_misses']}")
|
306
|
+
self.logger.info(f" 命中率: {stats['hit_rate_percent']:.1f}%")
|
307
|
+
self.logger.info(f" 基础类型跳过: {stats['basic_type_skips']}")
|
308
|
+
|
309
|
+
|
310
|
+
# 全局索引实例
|
311
|
+
_global_index = None
|
312
|
+
_index_lock = threading.Lock()
|
313
|
+
|
314
|
+
|
315
|
+
def get_type_index(sources_dir: Path = None) -> TypeMatchingIndex:
|
316
|
+
"""
|
317
|
+
获取全局类型索引实例(单例模式)
|
318
|
+
|
319
|
+
Args:
|
320
|
+
sources_dir: 源码目录(仅在首次调用时需要)
|
321
|
+
|
322
|
+
Returns:
|
323
|
+
TypeMatchingIndex实例
|
324
|
+
"""
|
325
|
+
global _global_index
|
326
|
+
|
327
|
+
if _global_index is None:
|
328
|
+
with _index_lock:
|
329
|
+
if _global_index is None:
|
330
|
+
if sources_dir is None:
|
331
|
+
raise ValueError("首次调用 get_type_index 时必须提供 sources_dir 参数")
|
332
|
+
_global_index = TypeMatchingIndex(sources_dir)
|
333
|
+
_global_index.build_index()
|
334
|
+
|
335
|
+
return _global_index
|
336
|
+
|
337
|
+
|
338
|
+
def clear_global_index():
|
339
|
+
"""清空全局索引"""
|
340
|
+
global _global_index
|
341
|
+
_global_index = None
|
File without changes
|
File without changes
|