reproto 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .git/COMMIT_EDITMSG +1 -1
- .git/index +0 -0
- .git/logs/HEAD +3 -0
- .git/logs/refs/heads/iyue +3 -0
- .git/logs/refs/remotes/gitlab/iyue +3 -0
- .git/logs/refs/remotes/origin/iyue +3 -0
- .git/objects/15/eb3f02479e633439ec83c143e703f8448043a1 +0 -0
- .git/objects/20/cf56ec106bcd66420dd000279f983571b918b6 +0 -0
- .git/objects/21/55b64d52922c88527c102d62f23e5c2abbae79 +0 -0
- .git/objects/26/1f67f3b731b32f6d77de9dd7be2d61e2a14ace +0 -0
- .git/objects/2e/2c1c42f5ac5d665cc672d3792078b756d9ab0e +0 -0
- .git/objects/33/52dfa8f5d9eb46cc98ea7ccecf02e4d9df95f7 +0 -0
- .git/objects/35/8bace20b731ff1bbb256d2a0158dfc84720978 +0 -0
- .git/objects/3c/6f0120229cc2cd8123efbeb7f186eb0a485f29 +0 -0
- .git/objects/4d/6d457bfabc4af842e5ddc2d56eb059d5dfdc9d +0 -0
- .git/objects/55/6723fdd4f525eed41c52fa80defca3f0c81c47 +0 -0
- .git/objects/65/a4f0ada7519f8b1e6a7c7e287541b8effde9fd +0 -0
- .git/objects/76/311aa8e59d780763e0d66787067cc5d9613a67 +0 -0
- .git/objects/8c/809c42c7ae13007fd885ee7bcffae7acf2c520 +0 -0
- .git/objects/8d/44142ae2d6dbb59d4ebed8587bccd051e5766b +0 -0
- .git/objects/8d/4a5767bef0c342f1660526f9671c0944922c40 +0 -0
- .git/objects/95/295a15779ebefd563ec777c3d3cced7e8d0209 +0 -0
- .git/objects/97/56fe0931216a7c40cbf250e1ab8a6dfd589f13 +0 -0
- .git/objects/9a/e313cdf64cd82416c1238eb493e6396f799f12 +0 -0
- .git/objects/cd/2d6c229438c6b1c694b9392a85888d89ef49c1 +0 -0
- .git/objects/db/beedb30613f79ae3ff67df1428cf8ade223711 +0 -0
- .git/objects/e8/1433b6ad92206cdadbee1f474b4f99383314cb +0 -0
- .git/objects/e9/a15996cb55ac72aeb6611d26e8d22246589943 +0 -0
- .git/objects/f7/25a430eb3364460ba854dbc8809edc21dc6c70 +0 -0
- .git/objects/fc/e15b9dbffd9f37b1f2d46944ee2d0394df6565 +2 -0
- .git/refs/heads/iyue +1 -1
- .git/refs/remotes/gitlab/iyue +1 -1
- .git/refs/remotes/origin/iyue +1 -1
- README.md +36 -116
- core/info_decoder.py +512 -105
- core/reconstructor.py +645 -84
- generation/proto_generator.py +38 -12
- main.py +36 -5
- parsing/java_parser.py +81 -1
- pyproject.toml +13 -2
- {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/METADATA +46 -119
- {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/RECORD +46 -20
- utils/file_cache.py +165 -0
- utils/type_index.py +341 -0
- {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/WHEEL +0 -0
- {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/entry_points.txt +0 -0
core/reconstructor.py
CHANGED
@@ -31,6 +31,9 @@ class JavaSourceAnalyzer:
|
|
31
31
|
self._current_class_name = None
|
32
32
|
# 初始化JavaParser用于字段类型解析
|
33
33
|
self.java_parser = JavaParser()
|
34
|
+
# 使用文件缓存系统优化I/O性能
|
35
|
+
from utils.file_cache import get_file_cache
|
36
|
+
self.file_cache = get_file_cache()
|
34
37
|
|
35
38
|
def set_current_class(self, class_name: str):
|
36
39
|
"""设置当前分析的类"""
|
@@ -65,7 +68,7 @@ class JavaSourceAnalyzer:
|
|
65
68
|
从Java源码中获取字段的真实类型
|
66
69
|
|
67
70
|
Args:
|
68
|
-
field_name_raw: 原始字段名(如
|
71
|
+
field_name_raw: 原始字段名(如 contacts_)
|
69
72
|
expected_type: 期望的基础类型(message、enum 或 map)
|
70
73
|
|
71
74
|
Returns:
|
@@ -77,40 +80,74 @@ class JavaSourceAnalyzer:
|
|
77
80
|
# 清理字段名
|
78
81
|
field_name = field_name_raw.rstrip('_')
|
79
82
|
|
80
|
-
#
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
|
83
|
+
# 查找字段声明模式,支持多种声明格式
|
84
|
+
patterns = [
|
85
|
+
# Internal.ProtobufList<Contact> contacts_ = ...
|
86
|
+
rf'private\s+Internal\.ProtobufList<([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
87
|
+
# MapFieldLite<String, Contact> contacts_ = ...
|
88
|
+
rf'private\s+MapFieldLite<([^,]+),\s*([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
89
|
+
# List<Contact> contacts_ = ...
|
90
|
+
rf'private\s+List<([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
91
|
+
# Internal.IntList badges_ = ... (用于枚举列表)
|
92
|
+
rf'private\s+(Internal\.IntList)\s+{re.escape(field_name)}_\s*=',
|
93
|
+
# 普通字段声明: private Contact contact_ = ...
|
94
|
+
rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*=',
|
95
|
+
# 简单字段声明: private Contact contact_;
|
96
|
+
rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*;'
|
97
|
+
]
|
98
|
+
|
99
|
+
for i, pattern in enumerate(patterns):
|
100
|
+
matches = re.findall(pattern, self._current_class_content)
|
101
|
+
if matches:
|
102
|
+
if i == 0: # Internal.ProtobufList<Contact>
|
103
|
+
element_type = matches[0]
|
104
|
+
return f"Internal.ProtobufList<{element_type}>"
|
105
|
+
elif i == 1: # MapFieldLite<String, Contact>
|
106
|
+
key_type, value_type = matches[0]
|
107
|
+
return f"MapFieldLite<{key_type.strip()}, {value_type.strip()}>"
|
108
|
+
elif i == 2: # List<Contact>
|
109
|
+
element_type = matches[0]
|
110
|
+
return f"List<{element_type}>"
|
111
|
+
elif i == 3: # Internal.IntList
|
112
|
+
return "Internal.IntList"
|
113
|
+
else: # 普通类型
|
114
|
+
simple_type = matches[0]
|
115
|
+
|
116
|
+
# 检查是否为Java基础类型,如果是则直接返回
|
117
|
+
basic_java_types = {
|
118
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
119
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
120
|
+
'Byte', 'Short', 'Character'
|
121
|
+
}
|
122
|
+
|
123
|
+
if simple_type in basic_java_types:
|
124
|
+
return simple_type # 直接返回基础类型,不添加包名
|
125
|
+
|
126
|
+
# 如果字段声明是基础类型(如int),但期望类型是enum,尝试从setter方法获取真实类型
|
127
|
+
if expected_type == 'enum' and simple_type in ['int', 'long', 'short', 'byte']:
|
128
|
+
setter_type = self._get_type_from_setter(field_name)
|
129
|
+
if setter_type:
|
130
|
+
return setter_type
|
131
|
+
continue
|
132
|
+
|
133
|
+
# 特殊处理:Internal.IntList可能对应枚举列表
|
134
|
+
if simple_type == 'Internal.IntList':
|
135
|
+
# 检查是否有对应的枚举setter方法
|
136
|
+
enum_type = self._get_enum_type_from_list_setter(field_name)
|
137
|
+
if enum_type:
|
138
|
+
return f"Internal.ProtobufList<{enum_type}>"
|
139
|
+
|
140
|
+
# 查找import语句获取完整类名
|
141
|
+
import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
|
142
|
+
import_matches = re.findall(import_pattern, self._current_class_content)
|
143
|
+
|
144
|
+
if import_matches:
|
145
|
+
return import_matches[0] # 返回完整的包名.类名
|
146
|
+
else:
|
147
|
+
# 如果没有import,假设在同一个包中
|
148
|
+
if self._current_class_name:
|
149
|
+
package_name = '.'.join(self._current_class_name.split('.')[:-1])
|
150
|
+
return f"{package_name}.{simple_type}"
|
114
151
|
|
115
152
|
return None
|
116
153
|
|
@@ -151,24 +188,38 @@ class JavaSourceAnalyzer:
|
|
151
188
|
Returns:
|
152
189
|
protobuf类型名
|
153
190
|
"""
|
191
|
+
if not java_type:
|
192
|
+
return 'string'
|
193
|
+
|
154
194
|
# 基础类型映射
|
155
195
|
basic_types = {
|
196
|
+
'int': 'int32',
|
197
|
+
'long': 'int64',
|
198
|
+
'float': 'float',
|
199
|
+
'double': 'double',
|
200
|
+
'boolean': 'bool',
|
156
201
|
'String': 'string',
|
157
|
-
'
|
158
|
-
'
|
159
|
-
'
|
160
|
-
'Float': 'float',
|
161
|
-
'Double': 'double',
|
162
|
-
'
|
202
|
+
'java.lang.String': 'string',
|
203
|
+
'java.lang.Integer': 'int32',
|
204
|
+
'java.lang.Long': 'int64',
|
205
|
+
'java.lang.Float': 'float',
|
206
|
+
'java.lang.Double': 'double',
|
207
|
+
'java.lang.Boolean': 'bool',
|
208
|
+
'byte[]': 'bytes',
|
209
|
+
'ByteString': 'bytes',
|
210
|
+
'com.google.protobuf.ByteString': 'bytes',
|
163
211
|
}
|
164
212
|
|
213
|
+
# 检查是否为基础类型
|
165
214
|
if java_type in basic_types:
|
166
215
|
return basic_types[java_type]
|
167
216
|
|
168
|
-
#
|
217
|
+
# 如果是完整的类名,提取简单类名
|
169
218
|
if '.' in java_type:
|
170
|
-
|
219
|
+
simple_name = java_type.split('.')[-1]
|
220
|
+
return simple_name
|
171
221
|
|
222
|
+
# 默认返回原类型名
|
172
223
|
return java_type
|
173
224
|
|
174
225
|
def _get_type_from_setter(self, field_name: str) -> Optional[str]:
|
@@ -184,8 +235,49 @@ class JavaSourceAnalyzer:
|
|
184
235
|
# 将字段名转换为setter方法名
|
185
236
|
setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
|
186
237
|
|
187
|
-
#
|
188
|
-
|
238
|
+
# 查找私有setter方法:/* JADX INFO: Access modifiers changed from: private */
|
239
|
+
# public void setSpamType(SpamType spamType)
|
240
|
+
patterns = [
|
241
|
+
# 查找setter方法签名,支持public或private
|
242
|
+
rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)',
|
243
|
+
# 也支持注释中的private标记
|
244
|
+
rf'\/\*[^*]*private[^*]*\*\/\s*(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)'
|
245
|
+
]
|
246
|
+
|
247
|
+
for pattern in patterns:
|
248
|
+
matches = re.findall(pattern, self._current_class_content, re.DOTALL)
|
249
|
+
if matches:
|
250
|
+
simple_type = matches[0]
|
251
|
+
|
252
|
+
# 查找import语句获取完整类名
|
253
|
+
import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
|
254
|
+
import_matches = re.findall(import_pattern, self._current_class_content)
|
255
|
+
|
256
|
+
if import_matches:
|
257
|
+
return import_matches[0]
|
258
|
+
else:
|
259
|
+
# 如果没有import,假设在同一个包中
|
260
|
+
if self._current_class_name:
|
261
|
+
package_name = '.'.join(self._current_class_name.split('.')[:-1])
|
262
|
+
return f"{package_name}.{simple_type}"
|
263
|
+
|
264
|
+
return None
|
265
|
+
|
266
|
+
def _get_enum_type_from_list_setter(self, field_name: str) -> Optional[str]:
|
267
|
+
"""
|
268
|
+
从列表setter方法中获取枚举类型(如setBadges(int i10, Badge badge))
|
269
|
+
|
270
|
+
Args:
|
271
|
+
field_name: 字段名(如 badges)
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
枚举类型名
|
275
|
+
"""
|
276
|
+
# 将字段名转换为setter方法名
|
277
|
+
setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
|
278
|
+
|
279
|
+
# 查找列表setter方法:setBadges(int i10, Badge badge)
|
280
|
+
pattern = rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*int\s+\w+,\s*(\w+)\s+\w+\s*\)'
|
189
281
|
matches = re.findall(pattern, self._current_class_content)
|
190
282
|
|
191
283
|
if matches:
|
@@ -206,19 +298,21 @@ class JavaSourceAnalyzer:
|
|
206
298
|
return None
|
207
299
|
|
208
300
|
def _load_class_content(self, class_name: str) -> Optional[str]:
|
209
|
-
"""
|
301
|
+
"""加载类的源码内容(使用缓存优化)"""
|
210
302
|
try:
|
211
303
|
# 标准路径:com.example.Model -> com/example/Model.java
|
212
304
|
file_path = class_name.replace('.', '/') + '.java'
|
213
305
|
full_path = self.sources_dir / file_path
|
214
306
|
|
215
|
-
|
216
|
-
|
307
|
+
# 使用缓存系统获取文件内容
|
308
|
+
content = self.file_cache.get_content(full_path)
|
309
|
+
if content:
|
310
|
+
return content
|
217
311
|
|
218
312
|
# 备选方案:按简单类名搜索
|
219
313
|
simple_name = class_name.split('.')[-1]
|
220
314
|
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
221
|
-
return
|
315
|
+
return self.file_cache.get_content(java_file)
|
222
316
|
|
223
317
|
return None
|
224
318
|
except Exception:
|
@@ -249,38 +343,64 @@ class ProtoReconstructor:
|
|
249
343
|
|
250
344
|
# 初始化核心组件
|
251
345
|
self.java_parser = JavaParser() # Java文件解析器
|
252
|
-
#
|
253
|
-
self.
|
254
|
-
self.info_decoder = InfoDecoder(self.java_source_analyzer) # 字节码解码器
|
346
|
+
self.enum_parser = EnumParser(str(sources_dir)) # 枚举解析器需要字符串路径
|
347
|
+
self.info_decoder = InfoDecoder()
|
255
348
|
self.proto_generator = ProtoGenerator() # Proto文件生成器
|
256
349
|
|
350
|
+
# 初始化Java源码分析器
|
351
|
+
self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
|
352
|
+
self.info_decoder.java_source_analyzer = self.java_source_analyzer
|
353
|
+
|
354
|
+
# 初始化类型索引(延迟加载)
|
355
|
+
from utils.type_index import get_type_index
|
356
|
+
self.type_index = get_type_index(sources_dir)
|
357
|
+
|
257
358
|
# 任务调度状态
|
258
359
|
self.processed_classes: Set[str] = set() # 已处理的类
|
259
360
|
self.pending_classes: deque = deque() # 待处理的类队列
|
260
361
|
self.message_definitions: Dict[str, MessageDefinition] = {} # 消息定义
|
261
362
|
self.enum_definitions: Dict[str, EnumDefinition] = {} # 枚举定义
|
262
363
|
|
364
|
+
# 错误和状态跟踪
|
365
|
+
self.failed_classes: Dict[str, str] = {} # 失败的类 -> 失败原因
|
366
|
+
self.skipped_classes: Dict[str, str] = {} # 跳过的类 -> 跳过原因
|
367
|
+
|
368
|
+
# 当前处理的类名(用于调试)
|
369
|
+
self._current_processing_class = None
|
370
|
+
|
263
371
|
def reconstruct_from_root(self, root_class: str) -> Dict[str, any]:
|
264
372
|
"""
|
265
|
-
|
373
|
+
从根类开始重构protobuf定义
|
266
374
|
|
267
375
|
Args:
|
268
|
-
root_class:
|
376
|
+
root_class: 根类的完整名称
|
269
377
|
|
270
378
|
Returns:
|
271
|
-
|
379
|
+
包含统计信息的字典
|
272
380
|
"""
|
273
|
-
self.logger.info(f"开始重构,根类: {root_class}")
|
381
|
+
self.logger.info(f"🚀 开始重构,根类: {root_class}")
|
274
382
|
|
275
|
-
#
|
383
|
+
# 1. 添加根类到处理队列
|
276
384
|
self.pending_classes.append(root_class)
|
277
385
|
|
278
|
-
#
|
386
|
+
# 2. 处理所有消息类
|
279
387
|
self._process_all_classes()
|
280
388
|
|
281
|
-
|
389
|
+
# 3. 解析所有枚举类
|
390
|
+
self._process_all_enums()
|
391
|
+
|
392
|
+
# 4. 生成proto文件
|
282
393
|
self._generate_all_proto_files()
|
283
394
|
|
395
|
+
# 5. 输出性能统计信息
|
396
|
+
from utils.file_cache import get_file_cache
|
397
|
+
file_cache = get_file_cache()
|
398
|
+
file_cache.print_stats()
|
399
|
+
|
400
|
+
# 输出类型索引统计
|
401
|
+
self.type_index.print_stats()
|
402
|
+
|
403
|
+
# 6. 返回统计信息
|
284
404
|
# 报告未知类型统计
|
285
405
|
self._report_unknown_types()
|
286
406
|
|
@@ -304,6 +424,35 @@ class ProtoReconstructor:
|
|
304
424
|
self.logger.info(f"处理类: {class_name}")
|
305
425
|
self._process_single_class(class_name)
|
306
426
|
|
427
|
+
def _process_all_enums(self) -> None:
|
428
|
+
"""解析目标包下的所有枚举类"""
|
429
|
+
self.logger.info("🔢 开始解析枚举类...")
|
430
|
+
|
431
|
+
# 从已处理的类中推断目标包名
|
432
|
+
target_package = None
|
433
|
+
if self.message_definitions:
|
434
|
+
# 取第一个消息定义的包名
|
435
|
+
first_message = next(iter(self.message_definitions.values()))
|
436
|
+
target_package = first_message.package_name
|
437
|
+
elif self.processed_classes:
|
438
|
+
# 从已处理的类名中推断包名
|
439
|
+
first_class = next(iter(self.processed_classes))
|
440
|
+
target_package = '.'.join(first_class.split('.')[:-1])
|
441
|
+
|
442
|
+
if not target_package:
|
443
|
+
self.logger.warning("⚠️ 无法推断目标包名,跳过枚举解析")
|
444
|
+
return
|
445
|
+
|
446
|
+
# 解析目标包下的所有枚举
|
447
|
+
enum_definitions = self.enum_parser.parse_all_enums(target_package)
|
448
|
+
|
449
|
+
# 存储枚举定义
|
450
|
+
for enum_def in enum_definitions:
|
451
|
+
self.enum_definitions[enum_def.full_name] = enum_def
|
452
|
+
self.logger.info(f" ✅ 解析枚举: {enum_def.name} ({len(enum_def.values)} 个值)")
|
453
|
+
|
454
|
+
self.logger.info(f"📊 枚举解析完成,共解析 {len(enum_definitions)} 个枚举")
|
455
|
+
|
307
456
|
def _process_single_class(self, class_name: str) -> None:
|
308
457
|
"""
|
309
458
|
处理单个Java类
|
@@ -317,10 +466,19 @@ class ProtoReconstructor:
|
|
317
466
|
self.java_source_analyzer.set_current_class(class_name)
|
318
467
|
|
319
468
|
try:
|
469
|
+
# 检查是否应该跳过这个类
|
470
|
+
if self._should_skip_class(class_name):
|
471
|
+
skip_reason = self._get_skip_reason(class_name)
|
472
|
+
self.skipped_classes[class_name] = skip_reason
|
473
|
+
self.logger.info(f" ⏭️ 跳过类: {class_name} ({skip_reason})")
|
474
|
+
return
|
475
|
+
|
320
476
|
# 1. 查找Java文件
|
321
477
|
java_file_path = self._find_java_file(class_name)
|
322
478
|
if not java_file_path:
|
323
|
-
|
479
|
+
error_msg = "找不到对应的Java文件"
|
480
|
+
self.failed_classes[class_name] = error_msg
|
481
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
324
482
|
return
|
325
483
|
|
326
484
|
# 2. 尝试解析为枚举
|
@@ -335,12 +493,14 @@ class ProtoReconstructor:
|
|
335
493
|
# 3. 尝试解析为消息类
|
336
494
|
info_string, objects_array = self.java_parser.parse_java_file(java_file_path)
|
337
495
|
if not info_string:
|
338
|
-
|
496
|
+
error_msg = "无法从Java文件中提取protobuf信息"
|
497
|
+
self.failed_classes[class_name] = error_msg
|
498
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
339
499
|
return
|
340
500
|
|
341
501
|
# 4. 解码字节码为消息定义
|
342
502
|
message_def = self.info_decoder.decode_message_info(
|
343
|
-
class_name, info_string, objects_array
|
503
|
+
class_name, info_string, objects_array, java_file_path
|
344
504
|
)
|
345
505
|
|
346
506
|
if message_def:
|
@@ -350,10 +510,16 @@ class ProtoReconstructor:
|
|
350
510
|
# 5. 发现并添加依赖类到队列
|
351
511
|
self._discover_dependencies(message_def)
|
352
512
|
else:
|
353
|
-
|
513
|
+
error_msg = "字节码解码失败,可能不是protobuf消息类"
|
514
|
+
self.failed_classes[class_name] = error_msg
|
515
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
354
516
|
|
355
517
|
except Exception as e:
|
356
|
-
|
518
|
+
error_msg = f"处理异常: {str(e)}"
|
519
|
+
self.failed_classes[class_name] = error_msg
|
520
|
+
self.logger.error(f" ❌ {error_msg}: {class_name}")
|
521
|
+
if hasattr(self, '_verbose') and self._verbose:
|
522
|
+
self.logger.exception(f"详细异常信息 ({class_name}):")
|
357
523
|
finally:
|
358
524
|
# 无论成功失败都标记为已处理,避免无限循环
|
359
525
|
self.processed_classes.add(class_name)
|
@@ -387,19 +553,202 @@ class ProtoReconstructor:
|
|
387
553
|
|
388
554
|
# 从常规字段提取依赖
|
389
555
|
for field in message_def.fields:
|
390
|
-
|
391
|
-
|
392
|
-
dependencies.append(dep)
|
556
|
+
deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
|
557
|
+
dependencies.extend(deps)
|
393
558
|
|
394
559
|
# 从oneof字段提取依赖
|
395
560
|
for oneof in message_def.oneofs:
|
396
561
|
for field in oneof.fields:
|
397
|
-
|
398
|
-
|
399
|
-
|
562
|
+
deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
|
563
|
+
dependencies.extend(deps)
|
564
|
+
|
565
|
+
# 去重
|
566
|
+
return list(set(dependencies))
|
567
|
+
|
568
|
+
def _extract_field_dependencies(self, type_name: str, current_package: str) -> List[str]:
|
569
|
+
"""
|
570
|
+
从字段类型中提取所有依赖(包括map类型的键值类型)
|
571
|
+
|
572
|
+
Args:
|
573
|
+
type_name: 字段类型名
|
574
|
+
current_package: 当前包名
|
575
|
+
|
576
|
+
Returns:
|
577
|
+
依赖类名列表
|
578
|
+
"""
|
579
|
+
dependencies = []
|
580
|
+
|
581
|
+
if not type_name:
|
582
|
+
return dependencies
|
583
|
+
|
584
|
+
# 处理map类型: map<string, Contact> -> [Contact]
|
585
|
+
if type_name.startswith('map<') and type_name.endswith('>'):
|
586
|
+
map_content = type_name[4:-1] # 移除 'map<' 和 '>'
|
587
|
+
# 分割键值类型,处理嵌套的尖括号
|
588
|
+
key_type, value_type = self._parse_map_types(map_content)
|
589
|
+
|
590
|
+
# 递归处理键类型和值类型
|
591
|
+
dependencies.extend(self._extract_field_dependencies(key_type, current_package))
|
592
|
+
dependencies.extend(self._extract_field_dependencies(value_type, current_package))
|
593
|
+
|
594
|
+
# 处理普通类型
|
595
|
+
else:
|
596
|
+
dep = self._resolve_field_dependency(type_name, current_package)
|
597
|
+
if dep:
|
598
|
+
dependencies.append(dep)
|
400
599
|
|
401
600
|
return dependencies
|
402
601
|
|
602
|
+
def _parse_map_types(self, map_content: str) -> tuple:
|
603
|
+
"""
|
604
|
+
解析map类型的键值类型
|
605
|
+
|
606
|
+
Args:
|
607
|
+
map_content: map内容,如 "string, Contact" 或 "string, List<Contact>"
|
608
|
+
|
609
|
+
Returns:
|
610
|
+
(key_type, value_type) 元组
|
611
|
+
"""
|
612
|
+
# 简单情况:没有嵌套的尖括号
|
613
|
+
if '<' not in map_content:
|
614
|
+
parts = [part.strip() for part in map_content.split(',', 1)]
|
615
|
+
if len(parts) == 2:
|
616
|
+
return parts[0], parts[1]
|
617
|
+
|
618
|
+
# 复杂情况:处理嵌套的尖括号
|
619
|
+
bracket_count = 0
|
620
|
+
for i, char in enumerate(map_content):
|
621
|
+
if char == '<':
|
622
|
+
bracket_count += 1
|
623
|
+
elif char == '>':
|
624
|
+
bracket_count -= 1
|
625
|
+
elif char == ',' and bracket_count == 0:
|
626
|
+
# 找到分隔符
|
627
|
+
key_type = map_content[:i].strip()
|
628
|
+
value_type = map_content[i+1:].strip()
|
629
|
+
return key_type, value_type
|
630
|
+
|
631
|
+
# 如果解析失败,返回默认值
|
632
|
+
return 'string', 'string'
|
633
|
+
|
634
|
+
def _should_skip_class(self, class_name: str) -> bool:
|
635
|
+
"""
|
636
|
+
判断是否应该跳过某个类
|
637
|
+
|
638
|
+
Args:
|
639
|
+
class_name: 类名
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
是否应该跳过
|
643
|
+
"""
|
644
|
+
# 跳过已经处理过的类
|
645
|
+
if class_name in self.processed_classes:
|
646
|
+
return True
|
647
|
+
|
648
|
+
# 跳过基础类型(包括Java基础类型和常见的系统类型)
|
649
|
+
basic_types = {
|
650
|
+
# Java基础类型
|
651
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
652
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
653
|
+
'Byte', 'Short', 'Character',
|
654
|
+
# Java系统类型
|
655
|
+
'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
|
656
|
+
'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
|
657
|
+
'java.lang.Object', 'java.util.List', 'java.util.Map',
|
658
|
+
'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
|
659
|
+
}
|
660
|
+
|
661
|
+
if class_name in basic_types:
|
662
|
+
return True
|
663
|
+
|
664
|
+
# 跳过明显的系统类型和内部类型
|
665
|
+
if self._is_system_or_internal_type(class_name):
|
666
|
+
return True
|
667
|
+
|
668
|
+
return False
|
669
|
+
|
670
|
+
def _is_system_or_internal_type(self, class_name: str) -> bool:
|
671
|
+
"""
|
672
|
+
判断是否为系统类型或内部类型
|
673
|
+
|
674
|
+
Args:
|
675
|
+
class_name: 类名
|
676
|
+
|
677
|
+
Returns:
|
678
|
+
是否为系统或内部类型
|
679
|
+
"""
|
680
|
+
# 跳过明显不是protobuf类的包
|
681
|
+
skip_packages = [
|
682
|
+
'java.', 'javax.', 'android.', 'androidx.',
|
683
|
+
'kotlin.', 'kotlinx.', 'com.google.common.',
|
684
|
+
'org.apache.', 'org.junit.', 'junit.',
|
685
|
+
'com.unity3d.', # 添加Unity3D包,避免误匹配
|
686
|
+
'Internal.' # 跳过Internal包下的类型
|
687
|
+
]
|
688
|
+
|
689
|
+
for skip_pkg in skip_packages:
|
690
|
+
if class_name.startswith(skip_pkg):
|
691
|
+
return True
|
692
|
+
|
693
|
+
# 跳过明显的内部类型
|
694
|
+
internal_patterns = [
|
695
|
+
'Internal.ProtobufList',
|
696
|
+
'MapFieldLite',
|
697
|
+
'GeneratedMessageLite',
|
698
|
+
'MessageLiteOrBuilder'
|
699
|
+
]
|
700
|
+
|
701
|
+
for pattern in internal_patterns:
|
702
|
+
if pattern in class_name:
|
703
|
+
return True
|
704
|
+
|
705
|
+
return False
|
706
|
+
|
707
|
+
def _get_skip_reason(self, class_name: str) -> str:
|
708
|
+
"""
|
709
|
+
获取跳过类的原因
|
710
|
+
|
711
|
+
Args:
|
712
|
+
class_name: 类名
|
713
|
+
|
714
|
+
Returns:
|
715
|
+
跳过原因
|
716
|
+
"""
|
717
|
+
# 基础类型
|
718
|
+
basic_types = {
|
719
|
+
'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
|
720
|
+
'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
|
721
|
+
'java.lang.Object', 'java.util.List', 'java.util.Map',
|
722
|
+
'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
|
723
|
+
}
|
724
|
+
|
725
|
+
if class_name in basic_types:
|
726
|
+
return "基础类型"
|
727
|
+
|
728
|
+
# 已处理
|
729
|
+
if class_name in self.processed_classes:
|
730
|
+
return "已处理"
|
731
|
+
|
732
|
+
# 系统包
|
733
|
+
system_packages = {
|
734
|
+
'java.': 'Java系统包',
|
735
|
+
'javax.': 'Java扩展包',
|
736
|
+
'android.': 'Android系统包',
|
737
|
+
'androidx.': 'AndroidX包',
|
738
|
+
'kotlin.': 'Kotlin标准库',
|
739
|
+
'kotlinx.': 'Kotlin扩展库',
|
740
|
+
'com.google.common.': 'Google通用库',
|
741
|
+
'org.apache.': 'Apache库',
|
742
|
+
'org.junit.': 'JUnit测试库',
|
743
|
+
'junit.': 'JUnit库'
|
744
|
+
}
|
745
|
+
|
746
|
+
for prefix, reason in system_packages.items():
|
747
|
+
if class_name.startswith(prefix):
|
748
|
+
return reason
|
749
|
+
|
750
|
+
return "未知原因"
|
751
|
+
|
403
752
|
def _resolve_field_dependency(self, type_name: str, current_package: str) -> Optional[str]:
|
404
753
|
"""
|
405
754
|
解析字段类型名为完整的类名
|
@@ -414,9 +763,13 @@ class ProtoReconstructor:
|
|
414
763
|
if not type_name:
|
415
764
|
return None
|
416
765
|
|
417
|
-
#
|
418
|
-
|
419
|
-
|
766
|
+
# 检查是否为基础类型
|
767
|
+
basic_proto_types = {
|
768
|
+
'string', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
|
769
|
+
'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'float', 'double', 'bytes'
|
770
|
+
}
|
771
|
+
|
772
|
+
if type_name in basic_proto_types:
|
420
773
|
return None
|
421
774
|
|
422
775
|
# 如果已经是完整类名,直接返回
|
@@ -431,7 +784,13 @@ class ProtoReconstructor:
|
|
431
784
|
# 如果推断失败,尝试查找所有可能的匹配类
|
432
785
|
# 需要传递当前类名以便进行源码分析
|
433
786
|
current_class = getattr(self, '_current_processing_class', None)
|
434
|
-
|
787
|
+
best_match = self._find_best_matching_class(type_name, current_package, current_class)
|
788
|
+
|
789
|
+
# 如果找到匹配,验证该类是否确实存在
|
790
|
+
if best_match and self._find_java_file(best_match):
|
791
|
+
return best_match
|
792
|
+
|
793
|
+
return None
|
435
794
|
|
436
795
|
def _find_java_file(self, class_name: str) -> Optional[Path]:
|
437
796
|
"""
|
@@ -450,10 +809,31 @@ class ProtoReconstructor:
|
|
450
809
|
if full_path.exists():
|
451
810
|
return full_path
|
452
811
|
|
812
|
+
# 处理内部类:支持多层嵌套
|
813
|
+
# com.example.Models$Inner$Deep -> com/example/Models$Inner$Deep.java
|
814
|
+
if '$' in class_name:
|
815
|
+
# 找到最后一个.的位置,分离包名和类名部分
|
816
|
+
last_dot_index = class_name.rfind('.')
|
817
|
+
if last_dot_index != -1:
|
818
|
+
package_path = class_name[:last_dot_index].replace('.', '/') # 包路径
|
819
|
+
class_part = class_name[last_dot_index + 1:] # 类名部分(可能包含多个$)
|
820
|
+
inner_class_file_path = f"{package_path}/{class_part}.java"
|
821
|
+
inner_class_full_path = self.sources_dir / inner_class_file_path
|
822
|
+
|
823
|
+
if inner_class_full_path.exists():
|
824
|
+
return inner_class_full_path
|
825
|
+
|
453
826
|
# 备选方案:按简单类名搜索
|
454
827
|
simple_name = class_name.split('.')[-1]
|
455
|
-
|
456
|
-
|
828
|
+
# 对于内部类,简单名称可能包含多个$符号
|
829
|
+
if '$' in simple_name:
|
830
|
+
# 对于内部类,直接使用包含$的完整文件名搜索
|
831
|
+
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
832
|
+
return java_file
|
833
|
+
else:
|
834
|
+
# 对于普通类,使用原来的逻辑
|
835
|
+
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
836
|
+
return java_file
|
457
837
|
|
458
838
|
return None
|
459
839
|
|
@@ -503,7 +883,8 @@ class ProtoReconstructor:
|
|
503
883
|
if len(package_parts) > 1:
|
504
884
|
parent = '.'.join(package_parts[:-1])
|
505
885
|
# 常见的同级包名
|
506
|
-
common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf'
|
886
|
+
common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf',
|
887
|
+
'enums', 'enum', 'common', 'shared', 'core', 'base']
|
507
888
|
for sibling in common_siblings:
|
508
889
|
if sibling != package_parts[-1]: # 避免重复
|
509
890
|
candidates.append(f"{parent}.{sibling}")
|
@@ -511,10 +892,32 @@ class ProtoReconstructor:
|
|
511
892
|
# 4. 根包下的常见子包
|
512
893
|
if len(package_parts) > 2:
|
513
894
|
root_package = '.'.join(package_parts[:2]) # 如 com.example
|
514
|
-
common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto'
|
895
|
+
common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto',
|
896
|
+
'enums', 'enum', 'core', 'base', 'data', 'dto', 'vo']
|
515
897
|
for subpkg in common_subpackages:
|
516
898
|
candidates.append(f"{root_package}.{subpkg}")
|
517
899
|
|
900
|
+
# 5. 深度搜索:在当前包的各级父包下寻找常见子包
|
901
|
+
for i in range(len(package_parts) - 1, 1, -1):
|
902
|
+
parent_package = '.'.join(package_parts[:i])
|
903
|
+
# 在每个父包下寻找常见的子包
|
904
|
+
search_patterns = ['models', 'enums', 'types', 'common', 'shared', 'core']
|
905
|
+
for pattern in search_patterns:
|
906
|
+
candidates.append(f"{parent_package}.{pattern}")
|
907
|
+
# 也尝试更深一层的组合
|
908
|
+
if i > 2:
|
909
|
+
candidates.append(f"{parent_package}.{pattern}.{package_parts[-1]}")
|
910
|
+
|
911
|
+
# 6. 特殊情况:如果当前是v1包,也尝试其他版本
|
912
|
+
if 'v1' in package_parts:
|
913
|
+
for i, part in enumerate(package_parts):
|
914
|
+
if part == 'v1':
|
915
|
+
# 尝试v2, v3等
|
916
|
+
for version in ['v2', 'v3', 'v4']:
|
917
|
+
version_package = package_parts.copy()
|
918
|
+
version_package[i] = version
|
919
|
+
candidates.append('.'.join(version_package))
|
920
|
+
|
518
921
|
# 去重并保持顺序
|
519
922
|
seen = set()
|
520
923
|
unique_candidates = []
|
@@ -527,7 +930,7 @@ class ProtoReconstructor:
|
|
527
930
|
|
528
931
|
def _find_best_matching_class(self, type_name: str, current_package: str, current_class: str = None) -> Optional[str]:
|
529
932
|
"""
|
530
|
-
|
933
|
+
查找最佳匹配的类(使用索引优化)
|
531
934
|
|
532
935
|
Args:
|
533
936
|
type_name: 类型名(如 IdData)
|
@@ -544,7 +947,33 @@ class ProtoReconstructor:
|
|
544
947
|
self.logger.info(f" 🔍 源码分析: {type_name} -> {actual_type}")
|
545
948
|
return actual_type
|
546
949
|
|
547
|
-
#
|
950
|
+
# 预检查:如果是基础字段名,可能不需要创建单独的类
|
951
|
+
if self._is_basic_field_type(type_name, current_class):
|
952
|
+
self.logger.info(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
|
953
|
+
return None
|
954
|
+
|
955
|
+
# 使用类型索引进行快速匹配
|
956
|
+
best_match = self.type_index.find_best_match(type_name, current_package)
|
957
|
+
|
958
|
+
if best_match:
|
959
|
+
self.logger.info(f" 🔍 索引匹配: {type_name} -> {best_match}")
|
960
|
+
return best_match
|
961
|
+
|
962
|
+
# 索引未找到匹配,回退到传统方法(保留兼容性)
|
963
|
+
self.logger.debug(f" ⚠️ 索引未找到匹配,回退到目录扫描: {type_name}")
|
964
|
+
return self._fallback_directory_search(type_name, current_package)
|
965
|
+
|
966
|
+
def _fallback_directory_search(self, type_name: str, current_package: str) -> Optional[str]:
|
967
|
+
"""
|
968
|
+
回退的目录扫描方法(当索引匹配失败时使用)
|
969
|
+
|
970
|
+
Args:
|
971
|
+
type_name: 类型名
|
972
|
+
current_package: 当前包名
|
973
|
+
|
974
|
+
Returns:
|
975
|
+
匹配的类名或None
|
976
|
+
"""
|
548
977
|
matching_classes = []
|
549
978
|
|
550
979
|
# 在源码目录中搜索
|
@@ -557,7 +986,11 @@ class ProtoReconstructor:
|
|
557
986
|
if package_parts:
|
558
987
|
package_name = '.'.join(package_parts)
|
559
988
|
full_class_name = f"{package_name}.{file_name}"
|
560
|
-
|
989
|
+
|
990
|
+
# 添加包名过滤,避免匹配到无关的第三方库
|
991
|
+
if self._is_valid_package_for_matching(package_name, current_package):
|
992
|
+
similarity = self._calculate_package_similarity(package_name, current_package)
|
993
|
+
matching_classes.append((full_class_name, similarity))
|
561
994
|
|
562
995
|
if not matching_classes:
|
563
996
|
return None
|
@@ -566,9 +999,117 @@ class ProtoReconstructor:
|
|
566
999
|
matching_classes.sort(key=lambda x: x[1], reverse=True)
|
567
1000
|
best_match = matching_classes[0][0]
|
568
1001
|
|
569
|
-
self.logger.info(f" 🔍
|
1002
|
+
self.logger.info(f" 🔍 目录扫描匹配: {type_name} -> {best_match}")
|
570
1003
|
return best_match
|
571
1004
|
|
1005
|
+
def _is_basic_field_type(self, type_name: str, current_class: str = None) -> bool:
|
1006
|
+
"""
|
1007
|
+
检查是否为基础字段类型,避免为简单字段创建不必要的类
|
1008
|
+
|
1009
|
+
Args:
|
1010
|
+
type_name: 类型名
|
1011
|
+
current_class: 当前类名
|
1012
|
+
|
1013
|
+
Returns:
|
1014
|
+
是否为基础字段类型
|
1015
|
+
"""
|
1016
|
+
# 首先检查是否为Java基础类型
|
1017
|
+
basic_java_types = {
|
1018
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
1019
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
1020
|
+
'Byte', 'Short', 'Character'
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
if type_name in basic_java_types:
|
1024
|
+
return True
|
1025
|
+
|
1026
|
+
# 常见的基础字段名模式
|
1027
|
+
basic_patterns = [
|
1028
|
+
'tags', # tags_ 字段通常是 repeated string
|
1029
|
+
'ids', # ids_ 字段通常是 repeated string 或 repeated int64
|
1030
|
+
'values', # values_ 字段通常是基础类型数组
|
1031
|
+
'names', # names_ 字段通常是 repeated string
|
1032
|
+
'urls', # urls_ 字段通常是 repeated string
|
1033
|
+
'emails', # emails_ 字段通常是 repeated string
|
1034
|
+
]
|
1035
|
+
|
1036
|
+
type_lower = type_name.lower()
|
1037
|
+
|
1038
|
+
# 检查是否匹配基础模式
|
1039
|
+
if type_lower in basic_patterns:
|
1040
|
+
return True
|
1041
|
+
|
1042
|
+
# 如果有当前类,尝试从Java源码中验证
|
1043
|
+
if current_class:
|
1044
|
+
try:
|
1045
|
+
java_file = self._find_java_file(current_class)
|
1046
|
+
if java_file:
|
1047
|
+
content = java_file.read_text(encoding='utf-8')
|
1048
|
+
|
1049
|
+
# 查找对应的字段声明,检查是否为基础类型
|
1050
|
+
field_name_pattern = type_lower.rstrip('s') + 's?_' # tags -> tags?_
|
1051
|
+
import re
|
1052
|
+
|
1053
|
+
# 查找字段声明:private List<String> tags_; 或 private Internal.ProtobufList<String> tags_;
|
1054
|
+
patterns = [
|
1055
|
+
rf'private\s+(?:Internal\.)?ProtobufList<String>\s+{field_name_pattern}',
|
1056
|
+
rf'private\s+List<String>\s+{field_name_pattern}',
|
1057
|
+
rf'private\s+(?:Internal\.)?ProtobufList<Integer>\s+{field_name_pattern}',
|
1058
|
+
rf'private\s+List<Integer>\s+{field_name_pattern}',
|
1059
|
+
rf'private\s+(?:Internal\.)?ProtobufList<Long>\s+{field_name_pattern}',
|
1060
|
+
rf'private\s+List<Long>\s+{field_name_pattern}',
|
1061
|
+
]
|
1062
|
+
|
1063
|
+
for pattern in patterns:
|
1064
|
+
if re.search(pattern, content, re.IGNORECASE):
|
1065
|
+
return True
|
1066
|
+
|
1067
|
+
except Exception as e:
|
1068
|
+
self.logger.debug(f" 检查基础字段类型时出错: {e}")
|
1069
|
+
|
1070
|
+
return False
|
1071
|
+
|
1072
|
+
def _is_valid_package_for_matching(self, candidate_package: str, current_package: str) -> bool:
|
1073
|
+
"""
|
1074
|
+
检查候选包名是否适合用于匹配
|
1075
|
+
|
1076
|
+
Args:
|
1077
|
+
candidate_package: 候选包名
|
1078
|
+
current_package: 当前包名
|
1079
|
+
|
1080
|
+
Returns:
|
1081
|
+
是否为有效的匹配候选
|
1082
|
+
"""
|
1083
|
+
# 获取当前包的根包名(通常是前两部分,如 com.truecaller)
|
1084
|
+
current_parts = current_package.split('.')
|
1085
|
+
if len(current_parts) >= 2:
|
1086
|
+
current_root = '.'.join(current_parts[:2])
|
1087
|
+
else:
|
1088
|
+
current_root = current_package
|
1089
|
+
|
1090
|
+
# 过滤规则
|
1091
|
+
filters = [
|
1092
|
+
# 1. 排除明显的第三方库
|
1093
|
+
lambda pkg: 'unity3d' not in pkg.lower(),
|
1094
|
+
lambda pkg: 'facebook' not in pkg.lower(),
|
1095
|
+
lambda pkg: 'google' not in pkg.lower() or pkg.startswith(current_root),
|
1096
|
+
lambda pkg: 'android' not in pkg.lower() or pkg.startswith(current_root),
|
1097
|
+
lambda pkg: 'androidx' not in pkg.lower(),
|
1098
|
+
lambda pkg: 'kotlin' not in pkg.lower(),
|
1099
|
+
lambda pkg: 'java' not in pkg.lower(),
|
1100
|
+
lambda pkg: 'javax' not in pkg.lower(),
|
1101
|
+
|
1102
|
+
# 2. 优先选择同根包的类
|
1103
|
+
lambda pkg: pkg.startswith(current_root) or self._calculate_package_similarity(pkg, current_package) > 0.3
|
1104
|
+
]
|
1105
|
+
|
1106
|
+
# 应用所有过滤规则
|
1107
|
+
for filter_func in filters:
|
1108
|
+
if not filter_func(candidate_package):
|
1109
|
+
return False
|
1110
|
+
|
1111
|
+
return True
|
1112
|
+
|
572
1113
|
def _calculate_package_similarity(self, package1: str, package2: str) -> float:
|
573
1114
|
"""
|
574
1115
|
计算两个包名的相似度
|
@@ -606,13 +1147,28 @@ class ProtoReconstructor:
|
|
606
1147
|
Returns:
|
607
1148
|
实际的完整类型名
|
608
1149
|
"""
|
1150
|
+
# 首先检查是否为基础类型,如果是则直接跳过
|
1151
|
+
basic_types = {
|
1152
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
1153
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
1154
|
+
'Byte', 'Short', 'Character'
|
1155
|
+
}
|
1156
|
+
|
1157
|
+
if inferred_type in basic_types:
|
1158
|
+
self.logger.debug(f" 跳过基础类型: {inferred_type}")
|
1159
|
+
return None
|
1160
|
+
|
609
1161
|
try:
|
610
1162
|
java_file = self._find_java_file(class_name)
|
611
1163
|
if not java_file:
|
612
1164
|
return None
|
613
1165
|
|
614
|
-
#
|
615
|
-
|
1166
|
+
# 使用缓存读取Java源码
|
1167
|
+
from utils.file_cache import get_file_cache
|
1168
|
+
file_cache = get_file_cache()
|
1169
|
+
content = file_cache.get_content(java_file)
|
1170
|
+
if not content:
|
1171
|
+
return None
|
616
1172
|
|
617
1173
|
# 查找字段声明模式:private SomeType fieldName_;
|
618
1174
|
# 我们要找的是以inferred_type结尾的类型声明
|
@@ -626,6 +1182,11 @@ class ProtoReconstructor:
|
|
626
1182
|
# 取第一个匹配的类型
|
627
1183
|
actual_type_simple = matches[0]
|
628
1184
|
|
1185
|
+
# 再次检查匹配的类型是否为基础类型
|
1186
|
+
if actual_type_simple in basic_types:
|
1187
|
+
self.logger.debug(f" 匹配到基础类型,跳过: {actual_type_simple}")
|
1188
|
+
return None
|
1189
|
+
|
629
1190
|
# 检查是否有import语句
|
630
1191
|
import_pattern = rf'import\s+([^;]*\.{re.escape(actual_type_simple)});'
|
631
1192
|
import_matches = re.findall(import_pattern, content)
|