reproto 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .git/COMMIT_EDITMSG +24 -19
- .git/index +0 -0
- .git/logs/HEAD +2 -0
- .git/logs/refs/heads/iyue +2 -0
- .git/logs/refs/remotes/gitlab/iyue +2 -0
- .git/logs/refs/remotes/origin/iyue +2 -0
- .git/objects/18/89a4fb55eb3abdc528ce87f0cea039278c06fd +0 -0
- .git/objects/20/cf56ec106bcd66420dd000279f983571b918b6 +0 -0
- .git/objects/21/55b64d52922c88527c102d62f23e5c2abbae79 +0 -0
- .git/objects/33/181441ab38eded005db356da89b54c7d29f452 +0 -0
- .git/objects/33/52dfa8f5d9eb46cc98ea7ccecf02e4d9df95f7 +0 -0
- .git/objects/3c/6f0120229cc2cd8123efbeb7f186eb0a485f29 +0 -0
- .git/objects/40/84f4567d983a977c49598b7d886e46b13ff50b +0 -0
- .git/objects/76/311aa8e59d780763e0d66787067cc5d9613a67 +0 -0
- .git/objects/8c/809c42c7ae13007fd885ee7bcffae7acf2c520 +0 -0
- .git/objects/97/56fe0931216a7c40cbf250e1ab8a6dfd589f13 +0 -0
- .git/objects/9a/e313cdf64cd82416c1238eb493e6396f799f12 +0 -0
- .git/objects/a5/b7e4e1b63bfb65288f6553687aaabcfb4d51b1 +0 -0
- .git/objects/cd/2d6c229438c6b1c694b9392a85888d89ef49c1 +0 -0
- .git/objects/e8/1433b6ad92206cdadbee1f474b4f99383314cb +0 -0
- .git/objects/e8/2f42ea26b8bf4f0bc92c0648ac8f190f14226d +0 -0
- .git/objects/e9/a15996cb55ac72aeb6611d26e8d22246589943 +0 -0
- .git/objects/f5/18c69a6e1bf3052b79da01502b2837ea58f0f4 +0 -0
- .git/objects/f7/25a430eb3364460ba854dbc8809edc21dc6c70 +0 -0
- .git/refs/heads/iyue +1 -1
- .git/refs/remotes/gitlab/iyue +1 -1
- .git/refs/remotes/origin/iyue +1 -1
- README.md +37 -117
- core/info_decoder.py +512 -105
- core/reconstructor.py +594 -75
- generation/proto_generator.py +25 -5
- main.py +38 -7
- parsing/java_parser.py +81 -1
- pyproject.toml +13 -2
- {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/METADATA +47 -120
- {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/RECORD +39 -21
- utils/logger.py +2 -2
- {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/WHEEL +0 -0
- {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/entry_points.txt +0 -0
core/reconstructor.py
CHANGED
@@ -65,7 +65,7 @@ class JavaSourceAnalyzer:
|
|
65
65
|
从Java源码中获取字段的真实类型
|
66
66
|
|
67
67
|
Args:
|
68
|
-
field_name_raw: 原始字段名(如
|
68
|
+
field_name_raw: 原始字段名(如 contacts_)
|
69
69
|
expected_type: 期望的基础类型(message、enum 或 map)
|
70
70
|
|
71
71
|
Returns:
|
@@ -77,40 +77,74 @@ class JavaSourceAnalyzer:
|
|
77
77
|
# 清理字段名
|
78
78
|
field_name = field_name_raw.rstrip('_')
|
79
79
|
|
80
|
-
#
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
|
80
|
+
# 查找字段声明模式,支持多种声明格式
|
81
|
+
patterns = [
|
82
|
+
# Internal.ProtobufList<Contact> contacts_ = ...
|
83
|
+
rf'private\s+Internal\.ProtobufList<([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
84
|
+
# MapFieldLite<String, Contact> contacts_ = ...
|
85
|
+
rf'private\s+MapFieldLite<([^,]+),\s*([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
86
|
+
# List<Contact> contacts_ = ...
|
87
|
+
rf'private\s+List<([^>]+)>\s+{re.escape(field_name)}_\s*=',
|
88
|
+
# Internal.IntList badges_ = ... (用于枚举列表)
|
89
|
+
rf'private\s+(Internal\.IntList)\s+{re.escape(field_name)}_\s*=',
|
90
|
+
# 普通字段声明: private Contact contact_ = ...
|
91
|
+
rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*=',
|
92
|
+
# 简单字段声明: private Contact contact_;
|
93
|
+
rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*;'
|
94
|
+
]
|
95
|
+
|
96
|
+
for i, pattern in enumerate(patterns):
|
97
|
+
matches = re.findall(pattern, self._current_class_content)
|
98
|
+
if matches:
|
99
|
+
if i == 0: # Internal.ProtobufList<Contact>
|
100
|
+
element_type = matches[0]
|
101
|
+
return f"Internal.ProtobufList<{element_type}>"
|
102
|
+
elif i == 1: # MapFieldLite<String, Contact>
|
103
|
+
key_type, value_type = matches[0]
|
104
|
+
return f"MapFieldLite<{key_type.strip()}, {value_type.strip()}>"
|
105
|
+
elif i == 2: # List<Contact>
|
106
|
+
element_type = matches[0]
|
107
|
+
return f"List<{element_type}>"
|
108
|
+
elif i == 3: # Internal.IntList
|
109
|
+
return "Internal.IntList"
|
110
|
+
else: # 普通类型
|
111
|
+
simple_type = matches[0]
|
112
|
+
|
113
|
+
# 检查是否为Java基础类型,如果是则直接返回
|
114
|
+
basic_java_types = {
|
115
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
116
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
117
|
+
'Byte', 'Short', 'Character'
|
118
|
+
}
|
119
|
+
|
120
|
+
if simple_type in basic_java_types:
|
121
|
+
return simple_type # 直接返回基础类型,不添加包名
|
122
|
+
|
123
|
+
# 如果字段声明是基础类型(如int),但期望类型是enum,尝试从setter方法获取真实类型
|
124
|
+
if expected_type == 'enum' and simple_type in ['int', 'long', 'short', 'byte']:
|
125
|
+
setter_type = self._get_type_from_setter(field_name)
|
126
|
+
if setter_type:
|
127
|
+
return setter_type
|
128
|
+
continue
|
129
|
+
|
130
|
+
# 特殊处理:Internal.IntList可能对应枚举列表
|
131
|
+
if simple_type == 'Internal.IntList':
|
132
|
+
# 检查是否有对应的枚举setter方法
|
133
|
+
enum_type = self._get_enum_type_from_list_setter(field_name)
|
134
|
+
if enum_type:
|
135
|
+
return f"Internal.ProtobufList<{enum_type}>"
|
136
|
+
|
137
|
+
# 查找import语句获取完整类名
|
138
|
+
import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
|
139
|
+
import_matches = re.findall(import_pattern, self._current_class_content)
|
140
|
+
|
141
|
+
if import_matches:
|
142
|
+
return import_matches[0] # 返回完整的包名.类名
|
143
|
+
else:
|
144
|
+
# 如果没有import,假设在同一个包中
|
145
|
+
if self._current_class_name:
|
146
|
+
package_name = '.'.join(self._current_class_name.split('.')[:-1])
|
147
|
+
return f"{package_name}.{simple_type}"
|
114
148
|
|
115
149
|
return None
|
116
150
|
|
@@ -151,24 +185,38 @@ class JavaSourceAnalyzer:
|
|
151
185
|
Returns:
|
152
186
|
protobuf类型名
|
153
187
|
"""
|
188
|
+
if not java_type:
|
189
|
+
return 'string'
|
190
|
+
|
154
191
|
# 基础类型映射
|
155
192
|
basic_types = {
|
193
|
+
'int': 'int32',
|
194
|
+
'long': 'int64',
|
195
|
+
'float': 'float',
|
196
|
+
'double': 'double',
|
197
|
+
'boolean': 'bool',
|
156
198
|
'String': 'string',
|
157
|
-
'
|
158
|
-
'
|
159
|
-
'
|
160
|
-
'Float': 'float',
|
161
|
-
'Double': 'double',
|
162
|
-
'
|
199
|
+
'java.lang.String': 'string',
|
200
|
+
'java.lang.Integer': 'int32',
|
201
|
+
'java.lang.Long': 'int64',
|
202
|
+
'java.lang.Float': 'float',
|
203
|
+
'java.lang.Double': 'double',
|
204
|
+
'java.lang.Boolean': 'bool',
|
205
|
+
'byte[]': 'bytes',
|
206
|
+
'ByteString': 'bytes',
|
207
|
+
'com.google.protobuf.ByteString': 'bytes',
|
163
208
|
}
|
164
209
|
|
210
|
+
# 检查是否为基础类型
|
165
211
|
if java_type in basic_types:
|
166
212
|
return basic_types[java_type]
|
167
213
|
|
168
|
-
#
|
214
|
+
# 如果是完整的类名,提取简单类名
|
169
215
|
if '.' in java_type:
|
170
|
-
|
216
|
+
simple_name = java_type.split('.')[-1]
|
217
|
+
return simple_name
|
171
218
|
|
219
|
+
# 默认返回原类型名
|
172
220
|
return java_type
|
173
221
|
|
174
222
|
def _get_type_from_setter(self, field_name: str) -> Optional[str]:
|
@@ -184,8 +232,49 @@ class JavaSourceAnalyzer:
|
|
184
232
|
# 将字段名转换为setter方法名
|
185
233
|
setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
|
186
234
|
|
187
|
-
#
|
188
|
-
|
235
|
+
# 查找私有setter方法:/* JADX INFO: Access modifiers changed from: private */
|
236
|
+
# public void setSpamType(SpamType spamType)
|
237
|
+
patterns = [
|
238
|
+
# 查找setter方法签名,支持public或private
|
239
|
+
rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)',
|
240
|
+
# 也支持注释中的private标记
|
241
|
+
rf'\/\*[^*]*private[^*]*\*\/\s*(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)'
|
242
|
+
]
|
243
|
+
|
244
|
+
for pattern in patterns:
|
245
|
+
matches = re.findall(pattern, self._current_class_content, re.DOTALL)
|
246
|
+
if matches:
|
247
|
+
simple_type = matches[0]
|
248
|
+
|
249
|
+
# 查找import语句获取完整类名
|
250
|
+
import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
|
251
|
+
import_matches = re.findall(import_pattern, self._current_class_content)
|
252
|
+
|
253
|
+
if import_matches:
|
254
|
+
return import_matches[0]
|
255
|
+
else:
|
256
|
+
# 如果没有import,假设在同一个包中
|
257
|
+
if self._current_class_name:
|
258
|
+
package_name = '.'.join(self._current_class_name.split('.')[:-1])
|
259
|
+
return f"{package_name}.{simple_type}"
|
260
|
+
|
261
|
+
return None
|
262
|
+
|
263
|
+
def _get_enum_type_from_list_setter(self, field_name: str) -> Optional[str]:
|
264
|
+
"""
|
265
|
+
从列表setter方法中获取枚举类型(如setBadges(int i10, Badge badge))
|
266
|
+
|
267
|
+
Args:
|
268
|
+
field_name: 字段名(如 badges)
|
269
|
+
|
270
|
+
Returns:
|
271
|
+
枚举类型名
|
272
|
+
"""
|
273
|
+
# 将字段名转换为setter方法名
|
274
|
+
setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
|
275
|
+
|
276
|
+
# 查找列表setter方法:setBadges(int i10, Badge badge)
|
277
|
+
pattern = rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*int\s+\w+,\s*(\w+)\s+\w+\s*\)'
|
189
278
|
matches = re.findall(pattern, self._current_class_content)
|
190
279
|
|
191
280
|
if matches:
|
@@ -249,38 +338,52 @@ class ProtoReconstructor:
|
|
249
338
|
|
250
339
|
# 初始化核心组件
|
251
340
|
self.java_parser = JavaParser() # Java文件解析器
|
252
|
-
#
|
253
|
-
self.
|
254
|
-
self.info_decoder = InfoDecoder(self.java_source_analyzer) # 字节码解码器
|
341
|
+
self.enum_parser = EnumParser(str(sources_dir)) # 枚举解析器需要字符串路径
|
342
|
+
self.info_decoder = InfoDecoder()
|
255
343
|
self.proto_generator = ProtoGenerator() # Proto文件生成器
|
256
344
|
|
345
|
+
# 初始化Java源码分析器
|
346
|
+
self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
|
347
|
+
self.info_decoder.java_source_analyzer = self.java_source_analyzer
|
348
|
+
|
257
349
|
# 任务调度状态
|
258
350
|
self.processed_classes: Set[str] = set() # 已处理的类
|
259
351
|
self.pending_classes: deque = deque() # 待处理的类队列
|
260
352
|
self.message_definitions: Dict[str, MessageDefinition] = {} # 消息定义
|
261
353
|
self.enum_definitions: Dict[str, EnumDefinition] = {} # 枚举定义
|
262
354
|
|
355
|
+
# 错误和状态跟踪
|
356
|
+
self.failed_classes: Dict[str, str] = {} # 失败的类 -> 失败原因
|
357
|
+
self.skipped_classes: Dict[str, str] = {} # 跳过的类 -> 跳过原因
|
358
|
+
|
359
|
+
# 当前处理的类名(用于调试)
|
360
|
+
self._current_processing_class = None
|
361
|
+
|
263
362
|
def reconstruct_from_root(self, root_class: str) -> Dict[str, any]:
|
264
363
|
"""
|
265
|
-
|
364
|
+
从根类开始重构protobuf定义
|
266
365
|
|
267
366
|
Args:
|
268
|
-
root_class:
|
367
|
+
root_class: 根类的完整名称
|
269
368
|
|
270
369
|
Returns:
|
271
|
-
|
370
|
+
包含统计信息的字典
|
272
371
|
"""
|
273
|
-
self.logger.info(f"开始重构,根类: {root_class}")
|
372
|
+
self.logger.info(f"🚀 开始重构,根类: {root_class}")
|
274
373
|
|
275
|
-
#
|
374
|
+
# 1. 添加根类到处理队列
|
276
375
|
self.pending_classes.append(root_class)
|
277
376
|
|
278
|
-
#
|
377
|
+
# 2. 处理所有消息类
|
279
378
|
self._process_all_classes()
|
280
379
|
|
281
|
-
|
380
|
+
# 3. 解析所有枚举类
|
381
|
+
self._process_all_enums()
|
382
|
+
|
383
|
+
# 4. 生成proto文件
|
282
384
|
self._generate_all_proto_files()
|
283
385
|
|
386
|
+
# 5. 返回统计信息
|
284
387
|
# 报告未知类型统计
|
285
388
|
self._report_unknown_types()
|
286
389
|
|
@@ -304,6 +407,35 @@ class ProtoReconstructor:
|
|
304
407
|
self.logger.info(f"处理类: {class_name}")
|
305
408
|
self._process_single_class(class_name)
|
306
409
|
|
410
|
+
def _process_all_enums(self) -> None:
|
411
|
+
"""解析目标包下的所有枚举类"""
|
412
|
+
self.logger.info("🔢 开始解析枚举类...")
|
413
|
+
|
414
|
+
# 从已处理的类中推断目标包名
|
415
|
+
target_package = None
|
416
|
+
if self.message_definitions:
|
417
|
+
# 取第一个消息定义的包名
|
418
|
+
first_message = next(iter(self.message_definitions.values()))
|
419
|
+
target_package = first_message.package_name
|
420
|
+
elif self.processed_classes:
|
421
|
+
# 从已处理的类名中推断包名
|
422
|
+
first_class = next(iter(self.processed_classes))
|
423
|
+
target_package = '.'.join(first_class.split('.')[:-1])
|
424
|
+
|
425
|
+
if not target_package:
|
426
|
+
self.logger.warning("⚠️ 无法推断目标包名,跳过枚举解析")
|
427
|
+
return
|
428
|
+
|
429
|
+
# 解析目标包下的所有枚举
|
430
|
+
enum_definitions = self.enum_parser.parse_all_enums(target_package)
|
431
|
+
|
432
|
+
# 存储枚举定义
|
433
|
+
for enum_def in enum_definitions:
|
434
|
+
self.enum_definitions[enum_def.full_name] = enum_def
|
435
|
+
self.logger.info(f" ✅ 解析枚举: {enum_def.name} ({len(enum_def.values)} 个值)")
|
436
|
+
|
437
|
+
self.logger.info(f"📊 枚举解析完成,共解析 {len(enum_definitions)} 个枚举")
|
438
|
+
|
307
439
|
def _process_single_class(self, class_name: str) -> None:
|
308
440
|
"""
|
309
441
|
处理单个Java类
|
@@ -317,10 +449,19 @@ class ProtoReconstructor:
|
|
317
449
|
self.java_source_analyzer.set_current_class(class_name)
|
318
450
|
|
319
451
|
try:
|
452
|
+
# 检查是否应该跳过这个类
|
453
|
+
if self._should_skip_class(class_name):
|
454
|
+
skip_reason = self._get_skip_reason(class_name)
|
455
|
+
self.skipped_classes[class_name] = skip_reason
|
456
|
+
self.logger.info(f" ⏭️ 跳过类: {class_name} ({skip_reason})")
|
457
|
+
return
|
458
|
+
|
320
459
|
# 1. 查找Java文件
|
321
460
|
java_file_path = self._find_java_file(class_name)
|
322
461
|
if not java_file_path:
|
323
|
-
|
462
|
+
error_msg = "找不到对应的Java文件"
|
463
|
+
self.failed_classes[class_name] = error_msg
|
464
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
324
465
|
return
|
325
466
|
|
326
467
|
# 2. 尝试解析为枚举
|
@@ -335,12 +476,14 @@ class ProtoReconstructor:
|
|
335
476
|
# 3. 尝试解析为消息类
|
336
477
|
info_string, objects_array = self.java_parser.parse_java_file(java_file_path)
|
337
478
|
if not info_string:
|
338
|
-
|
479
|
+
error_msg = "无法从Java文件中提取protobuf信息"
|
480
|
+
self.failed_classes[class_name] = error_msg
|
481
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
339
482
|
return
|
340
483
|
|
341
484
|
# 4. 解码字节码为消息定义
|
342
485
|
message_def = self.info_decoder.decode_message_info(
|
343
|
-
class_name, info_string, objects_array
|
486
|
+
class_name, info_string, objects_array, java_file_path
|
344
487
|
)
|
345
488
|
|
346
489
|
if message_def:
|
@@ -350,10 +493,16 @@ class ProtoReconstructor:
|
|
350
493
|
# 5. 发现并添加依赖类到队列
|
351
494
|
self._discover_dependencies(message_def)
|
352
495
|
else:
|
353
|
-
|
496
|
+
error_msg = "字节码解码失败,可能不是protobuf消息类"
|
497
|
+
self.failed_classes[class_name] = error_msg
|
498
|
+
self.logger.warning(f" ❌ {error_msg}: {class_name}")
|
354
499
|
|
355
500
|
except Exception as e:
|
356
|
-
|
501
|
+
error_msg = f"处理异常: {str(e)}"
|
502
|
+
self.failed_classes[class_name] = error_msg
|
503
|
+
self.logger.error(f" ❌ {error_msg}: {class_name}")
|
504
|
+
if hasattr(self, '_verbose') and self._verbose:
|
505
|
+
self.logger.exception(f"详细异常信息 ({class_name}):")
|
357
506
|
finally:
|
358
507
|
# 无论成功失败都标记为已处理,避免无限循环
|
359
508
|
self.processed_classes.add(class_name)
|
@@ -387,19 +536,202 @@ class ProtoReconstructor:
|
|
387
536
|
|
388
537
|
# 从常规字段提取依赖
|
389
538
|
for field in message_def.fields:
|
390
|
-
|
391
|
-
|
392
|
-
dependencies.append(dep)
|
539
|
+
deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
|
540
|
+
dependencies.extend(deps)
|
393
541
|
|
394
542
|
# 从oneof字段提取依赖
|
395
543
|
for oneof in message_def.oneofs:
|
396
544
|
for field in oneof.fields:
|
397
|
-
|
398
|
-
|
399
|
-
|
545
|
+
deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
|
546
|
+
dependencies.extend(deps)
|
547
|
+
|
548
|
+
# 去重
|
549
|
+
return list(set(dependencies))
|
550
|
+
|
551
|
+
def _extract_field_dependencies(self, type_name: str, current_package: str) -> List[str]:
|
552
|
+
"""
|
553
|
+
从字段类型中提取所有依赖(包括map类型的键值类型)
|
554
|
+
|
555
|
+
Args:
|
556
|
+
type_name: 字段类型名
|
557
|
+
current_package: 当前包名
|
558
|
+
|
559
|
+
Returns:
|
560
|
+
依赖类名列表
|
561
|
+
"""
|
562
|
+
dependencies = []
|
563
|
+
|
564
|
+
if not type_name:
|
565
|
+
return dependencies
|
566
|
+
|
567
|
+
# 处理map类型: map<string, Contact> -> [Contact]
|
568
|
+
if type_name.startswith('map<') and type_name.endswith('>'):
|
569
|
+
map_content = type_name[4:-1] # 移除 'map<' 和 '>'
|
570
|
+
# 分割键值类型,处理嵌套的尖括号
|
571
|
+
key_type, value_type = self._parse_map_types(map_content)
|
572
|
+
|
573
|
+
# 递归处理键类型和值类型
|
574
|
+
dependencies.extend(self._extract_field_dependencies(key_type, current_package))
|
575
|
+
dependencies.extend(self._extract_field_dependencies(value_type, current_package))
|
576
|
+
|
577
|
+
# 处理普通类型
|
578
|
+
else:
|
579
|
+
dep = self._resolve_field_dependency(type_name, current_package)
|
580
|
+
if dep:
|
581
|
+
dependencies.append(dep)
|
400
582
|
|
401
583
|
return dependencies
|
402
584
|
|
585
|
+
def _parse_map_types(self, map_content: str) -> tuple:
|
586
|
+
"""
|
587
|
+
解析map类型的键值类型
|
588
|
+
|
589
|
+
Args:
|
590
|
+
map_content: map内容,如 "string, Contact" 或 "string, List<Contact>"
|
591
|
+
|
592
|
+
Returns:
|
593
|
+
(key_type, value_type) 元组
|
594
|
+
"""
|
595
|
+
# 简单情况:没有嵌套的尖括号
|
596
|
+
if '<' not in map_content:
|
597
|
+
parts = [part.strip() for part in map_content.split(',', 1)]
|
598
|
+
if len(parts) == 2:
|
599
|
+
return parts[0], parts[1]
|
600
|
+
|
601
|
+
# 复杂情况:处理嵌套的尖括号
|
602
|
+
bracket_count = 0
|
603
|
+
for i, char in enumerate(map_content):
|
604
|
+
if char == '<':
|
605
|
+
bracket_count += 1
|
606
|
+
elif char == '>':
|
607
|
+
bracket_count -= 1
|
608
|
+
elif char == ',' and bracket_count == 0:
|
609
|
+
# 找到分隔符
|
610
|
+
key_type = map_content[:i].strip()
|
611
|
+
value_type = map_content[i+1:].strip()
|
612
|
+
return key_type, value_type
|
613
|
+
|
614
|
+
# 如果解析失败,返回默认值
|
615
|
+
return 'string', 'string'
|
616
|
+
|
617
|
+
def _should_skip_class(self, class_name: str) -> bool:
|
618
|
+
"""
|
619
|
+
判断是否应该跳过某个类
|
620
|
+
|
621
|
+
Args:
|
622
|
+
class_name: 类名
|
623
|
+
|
624
|
+
Returns:
|
625
|
+
是否应该跳过
|
626
|
+
"""
|
627
|
+
# 跳过已经处理过的类
|
628
|
+
if class_name in self.processed_classes:
|
629
|
+
return True
|
630
|
+
|
631
|
+
# 跳过基础类型(包括Java基础类型和常见的系统类型)
|
632
|
+
basic_types = {
|
633
|
+
# Java基础类型
|
634
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
635
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
636
|
+
'Byte', 'Short', 'Character',
|
637
|
+
# Java系统类型
|
638
|
+
'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
|
639
|
+
'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
|
640
|
+
'java.lang.Object', 'java.util.List', 'java.util.Map',
|
641
|
+
'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
|
642
|
+
}
|
643
|
+
|
644
|
+
if class_name in basic_types:
|
645
|
+
return True
|
646
|
+
|
647
|
+
# 跳过明显的系统类型和内部类型
|
648
|
+
if self._is_system_or_internal_type(class_name):
|
649
|
+
return True
|
650
|
+
|
651
|
+
return False
|
652
|
+
|
653
|
+
def _is_system_or_internal_type(self, class_name: str) -> bool:
|
654
|
+
"""
|
655
|
+
判断是否为系统类型或内部类型
|
656
|
+
|
657
|
+
Args:
|
658
|
+
class_name: 类名
|
659
|
+
|
660
|
+
Returns:
|
661
|
+
是否为系统或内部类型
|
662
|
+
"""
|
663
|
+
# 跳过明显不是protobuf类的包
|
664
|
+
skip_packages = [
|
665
|
+
'java.', 'javax.', 'android.', 'androidx.',
|
666
|
+
'kotlin.', 'kotlinx.', 'com.google.common.',
|
667
|
+
'org.apache.', 'org.junit.', 'junit.',
|
668
|
+
'com.unity3d.', # 添加Unity3D包,避免误匹配
|
669
|
+
'Internal.' # 跳过Internal包下的类型
|
670
|
+
]
|
671
|
+
|
672
|
+
for skip_pkg in skip_packages:
|
673
|
+
if class_name.startswith(skip_pkg):
|
674
|
+
return True
|
675
|
+
|
676
|
+
# 跳过明显的内部类型
|
677
|
+
internal_patterns = [
|
678
|
+
'Internal.ProtobufList',
|
679
|
+
'MapFieldLite',
|
680
|
+
'GeneratedMessageLite',
|
681
|
+
'MessageLiteOrBuilder'
|
682
|
+
]
|
683
|
+
|
684
|
+
for pattern in internal_patterns:
|
685
|
+
if pattern in class_name:
|
686
|
+
return True
|
687
|
+
|
688
|
+
return False
|
689
|
+
|
690
|
+
def _get_skip_reason(self, class_name: str) -> str:
|
691
|
+
"""
|
692
|
+
获取跳过类的原因
|
693
|
+
|
694
|
+
Args:
|
695
|
+
class_name: 类名
|
696
|
+
|
697
|
+
Returns:
|
698
|
+
跳过原因
|
699
|
+
"""
|
700
|
+
# 基础类型
|
701
|
+
basic_types = {
|
702
|
+
'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
|
703
|
+
'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
|
704
|
+
'java.lang.Object', 'java.util.List', 'java.util.Map',
|
705
|
+
'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
|
706
|
+
}
|
707
|
+
|
708
|
+
if class_name in basic_types:
|
709
|
+
return "基础类型"
|
710
|
+
|
711
|
+
# 已处理
|
712
|
+
if class_name in self.processed_classes:
|
713
|
+
return "已处理"
|
714
|
+
|
715
|
+
# 系统包
|
716
|
+
system_packages = {
|
717
|
+
'java.': 'Java系统包',
|
718
|
+
'javax.': 'Java扩展包',
|
719
|
+
'android.': 'Android系统包',
|
720
|
+
'androidx.': 'AndroidX包',
|
721
|
+
'kotlin.': 'Kotlin标准库',
|
722
|
+
'kotlinx.': 'Kotlin扩展库',
|
723
|
+
'com.google.common.': 'Google通用库',
|
724
|
+
'org.apache.': 'Apache库',
|
725
|
+
'org.junit.': 'JUnit测试库',
|
726
|
+
'junit.': 'JUnit库'
|
727
|
+
}
|
728
|
+
|
729
|
+
for prefix, reason in system_packages.items():
|
730
|
+
if class_name.startswith(prefix):
|
731
|
+
return reason
|
732
|
+
|
733
|
+
return "未知原因"
|
734
|
+
|
403
735
|
def _resolve_field_dependency(self, type_name: str, current_package: str) -> Optional[str]:
|
404
736
|
"""
|
405
737
|
解析字段类型名为完整的类名
|
@@ -414,9 +746,13 @@ class ProtoReconstructor:
|
|
414
746
|
if not type_name:
|
415
747
|
return None
|
416
748
|
|
417
|
-
#
|
418
|
-
|
419
|
-
|
749
|
+
# 检查是否为基础类型
|
750
|
+
basic_proto_types = {
|
751
|
+
'string', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
|
752
|
+
'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'float', 'double', 'bytes'
|
753
|
+
}
|
754
|
+
|
755
|
+
if type_name in basic_proto_types:
|
420
756
|
return None
|
421
757
|
|
422
758
|
# 如果已经是完整类名,直接返回
|
@@ -431,7 +767,13 @@ class ProtoReconstructor:
|
|
431
767
|
# 如果推断失败,尝试查找所有可能的匹配类
|
432
768
|
# 需要传递当前类名以便进行源码分析
|
433
769
|
current_class = getattr(self, '_current_processing_class', None)
|
434
|
-
|
770
|
+
best_match = self._find_best_matching_class(type_name, current_package, current_class)
|
771
|
+
|
772
|
+
# 如果找到匹配,验证该类是否确实存在
|
773
|
+
if best_match and self._find_java_file(best_match):
|
774
|
+
return best_match
|
775
|
+
|
776
|
+
return None
|
435
777
|
|
436
778
|
def _find_java_file(self, class_name: str) -> Optional[Path]:
|
437
779
|
"""
|
@@ -450,10 +792,31 @@ class ProtoReconstructor:
|
|
450
792
|
if full_path.exists():
|
451
793
|
return full_path
|
452
794
|
|
795
|
+
# 处理内部类:支持多层嵌套
|
796
|
+
# com.example.Models$Inner$Deep -> com/example/Models$Inner$Deep.java
|
797
|
+
if '$' in class_name:
|
798
|
+
# 找到最后一个.的位置,分离包名和类名部分
|
799
|
+
last_dot_index = class_name.rfind('.')
|
800
|
+
if last_dot_index != -1:
|
801
|
+
package_path = class_name[:last_dot_index].replace('.', '/') # 包路径
|
802
|
+
class_part = class_name[last_dot_index + 1:] # 类名部分(可能包含多个$)
|
803
|
+
inner_class_file_path = f"{package_path}/{class_part}.java"
|
804
|
+
inner_class_full_path = self.sources_dir / inner_class_file_path
|
805
|
+
|
806
|
+
if inner_class_full_path.exists():
|
807
|
+
return inner_class_full_path
|
808
|
+
|
453
809
|
# 备选方案:按简单类名搜索
|
454
810
|
simple_name = class_name.split('.')[-1]
|
455
|
-
|
456
|
-
|
811
|
+
# 对于内部类,简单名称可能包含多个$符号
|
812
|
+
if '$' in simple_name:
|
813
|
+
# 对于内部类,直接使用包含$的完整文件名搜索
|
814
|
+
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
815
|
+
return java_file
|
816
|
+
else:
|
817
|
+
# 对于普通类,使用原来的逻辑
|
818
|
+
for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
|
819
|
+
return java_file
|
457
820
|
|
458
821
|
return None
|
459
822
|
|
@@ -503,7 +866,8 @@ class ProtoReconstructor:
|
|
503
866
|
if len(package_parts) > 1:
|
504
867
|
parent = '.'.join(package_parts[:-1])
|
505
868
|
# 常见的同级包名
|
506
|
-
common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf'
|
869
|
+
common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf',
|
870
|
+
'enums', 'enum', 'common', 'shared', 'core', 'base']
|
507
871
|
for sibling in common_siblings:
|
508
872
|
if sibling != package_parts[-1]: # 避免重复
|
509
873
|
candidates.append(f"{parent}.{sibling}")
|
@@ -511,10 +875,32 @@ class ProtoReconstructor:
|
|
511
875
|
# 4. 根包下的常见子包
|
512
876
|
if len(package_parts) > 2:
|
513
877
|
root_package = '.'.join(package_parts[:2]) # 如 com.example
|
514
|
-
common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto'
|
878
|
+
common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto',
|
879
|
+
'enums', 'enum', 'core', 'base', 'data', 'dto', 'vo']
|
515
880
|
for subpkg in common_subpackages:
|
516
881
|
candidates.append(f"{root_package}.{subpkg}")
|
517
882
|
|
883
|
+
# 5. 深度搜索:在当前包的各级父包下寻找常见子包
|
884
|
+
for i in range(len(package_parts) - 1, 1, -1):
|
885
|
+
parent_package = '.'.join(package_parts[:i])
|
886
|
+
# 在每个父包下寻找常见的子包
|
887
|
+
search_patterns = ['models', 'enums', 'types', 'common', 'shared', 'core']
|
888
|
+
for pattern in search_patterns:
|
889
|
+
candidates.append(f"{parent_package}.{pattern}")
|
890
|
+
# 也尝试更深一层的组合
|
891
|
+
if i > 2:
|
892
|
+
candidates.append(f"{parent_package}.{pattern}.{package_parts[-1]}")
|
893
|
+
|
894
|
+
# 6. 特殊情况:如果当前是v1包,也尝试其他版本
|
895
|
+
if 'v1' in package_parts:
|
896
|
+
for i, part in enumerate(package_parts):
|
897
|
+
if part == 'v1':
|
898
|
+
# 尝试v2, v3等
|
899
|
+
for version in ['v2', 'v3', 'v4']:
|
900
|
+
version_package = package_parts.copy()
|
901
|
+
version_package[i] = version
|
902
|
+
candidates.append('.'.join(version_package))
|
903
|
+
|
518
904
|
# 去重并保持顺序
|
519
905
|
seen = set()
|
520
906
|
unique_candidates = []
|
@@ -544,6 +930,11 @@ class ProtoReconstructor:
|
|
544
930
|
self.logger.info(f" 🔍 源码分析: {type_name} -> {actual_type}")
|
545
931
|
return actual_type
|
546
932
|
|
933
|
+
# 预检查:如果是基础字段名,可能不需要创建单独的类
|
934
|
+
if self._is_basic_field_type(type_name, current_class):
|
935
|
+
self.logger.info(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
|
936
|
+
return None
|
937
|
+
|
547
938
|
# 如果源码分析失败,回退到模糊匹配
|
548
939
|
matching_classes = []
|
549
940
|
|
@@ -557,7 +948,11 @@ class ProtoReconstructor:
|
|
557
948
|
if package_parts:
|
558
949
|
package_name = '.'.join(package_parts)
|
559
950
|
full_class_name = f"{package_name}.{file_name}"
|
560
|
-
|
951
|
+
|
952
|
+
# 添加包名过滤,避免匹配到无关的第三方库
|
953
|
+
if self._is_valid_package_for_matching(package_name, current_package):
|
954
|
+
similarity = self._calculate_package_similarity(package_name, current_package)
|
955
|
+
matching_classes.append((full_class_name, similarity))
|
561
956
|
|
562
957
|
if not matching_classes:
|
563
958
|
return None
|
@@ -569,6 +964,114 @@ class ProtoReconstructor:
|
|
569
964
|
self.logger.info(f" 🔍 智能匹配: {type_name} -> {best_match}")
|
570
965
|
return best_match
|
571
966
|
|
967
|
+
def _is_basic_field_type(self, type_name: str, current_class: str = None) -> bool:
|
968
|
+
"""
|
969
|
+
检查是否为基础字段类型,避免为简单字段创建不必要的类
|
970
|
+
|
971
|
+
Args:
|
972
|
+
type_name: 类型名
|
973
|
+
current_class: 当前类名
|
974
|
+
|
975
|
+
Returns:
|
976
|
+
是否为基础字段类型
|
977
|
+
"""
|
978
|
+
# 首先检查是否为Java基础类型
|
979
|
+
basic_java_types = {
|
980
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
981
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
982
|
+
'Byte', 'Short', 'Character'
|
983
|
+
}
|
984
|
+
|
985
|
+
if type_name in basic_java_types:
|
986
|
+
return True
|
987
|
+
|
988
|
+
# 常见的基础字段名模式
|
989
|
+
basic_patterns = [
|
990
|
+
'tags', # tags_ 字段通常是 repeated string
|
991
|
+
'ids', # ids_ 字段通常是 repeated string 或 repeated int64
|
992
|
+
'values', # values_ 字段通常是基础类型数组
|
993
|
+
'names', # names_ 字段通常是 repeated string
|
994
|
+
'urls', # urls_ 字段通常是 repeated string
|
995
|
+
'emails', # emails_ 字段通常是 repeated string
|
996
|
+
]
|
997
|
+
|
998
|
+
type_lower = type_name.lower()
|
999
|
+
|
1000
|
+
# 检查是否匹配基础模式
|
1001
|
+
if type_lower in basic_patterns:
|
1002
|
+
return True
|
1003
|
+
|
1004
|
+
# 如果有当前类,尝试从Java源码中验证
|
1005
|
+
if current_class:
|
1006
|
+
try:
|
1007
|
+
java_file = self._find_java_file(current_class)
|
1008
|
+
if java_file:
|
1009
|
+
content = java_file.read_text(encoding='utf-8')
|
1010
|
+
|
1011
|
+
# 查找对应的字段声明,检查是否为基础类型
|
1012
|
+
field_name_pattern = type_lower.rstrip('s') + 's?_' # tags -> tags?_
|
1013
|
+
import re
|
1014
|
+
|
1015
|
+
# 查找字段声明:private List<String> tags_; 或 private Internal.ProtobufList<String> tags_;
|
1016
|
+
patterns = [
|
1017
|
+
rf'private\s+(?:Internal\.)?ProtobufList<String>\s+{field_name_pattern}',
|
1018
|
+
rf'private\s+List<String>\s+{field_name_pattern}',
|
1019
|
+
rf'private\s+(?:Internal\.)?ProtobufList<Integer>\s+{field_name_pattern}',
|
1020
|
+
rf'private\s+List<Integer>\s+{field_name_pattern}',
|
1021
|
+
rf'private\s+(?:Internal\.)?ProtobufList<Long>\s+{field_name_pattern}',
|
1022
|
+
rf'private\s+List<Long>\s+{field_name_pattern}',
|
1023
|
+
]
|
1024
|
+
|
1025
|
+
for pattern in patterns:
|
1026
|
+
if re.search(pattern, content, re.IGNORECASE):
|
1027
|
+
return True
|
1028
|
+
|
1029
|
+
except Exception as e:
|
1030
|
+
self.logger.debug(f" 检查基础字段类型时出错: {e}")
|
1031
|
+
|
1032
|
+
return False
|
1033
|
+
|
1034
|
+
def _is_valid_package_for_matching(self, candidate_package: str, current_package: str) -> bool:
|
1035
|
+
"""
|
1036
|
+
检查候选包名是否适合用于匹配
|
1037
|
+
|
1038
|
+
Args:
|
1039
|
+
candidate_package: 候选包名
|
1040
|
+
current_package: 当前包名
|
1041
|
+
|
1042
|
+
Returns:
|
1043
|
+
是否为有效的匹配候选
|
1044
|
+
"""
|
1045
|
+
# 获取当前包的根包名(通常是前两部分,如 com.truecaller)
|
1046
|
+
current_parts = current_package.split('.')
|
1047
|
+
if len(current_parts) >= 2:
|
1048
|
+
current_root = '.'.join(current_parts[:2])
|
1049
|
+
else:
|
1050
|
+
current_root = current_package
|
1051
|
+
|
1052
|
+
# 过滤规则
|
1053
|
+
filters = [
|
1054
|
+
# 1. 排除明显的第三方库
|
1055
|
+
lambda pkg: 'unity3d' not in pkg.lower(),
|
1056
|
+
lambda pkg: 'facebook' not in pkg.lower(),
|
1057
|
+
lambda pkg: 'google' not in pkg.lower() or pkg.startswith(current_root),
|
1058
|
+
lambda pkg: 'android' not in pkg.lower() or pkg.startswith(current_root),
|
1059
|
+
lambda pkg: 'androidx' not in pkg.lower(),
|
1060
|
+
lambda pkg: 'kotlin' not in pkg.lower(),
|
1061
|
+
lambda pkg: 'java' not in pkg.lower(),
|
1062
|
+
lambda pkg: 'javax' not in pkg.lower(),
|
1063
|
+
|
1064
|
+
# 2. 优先选择同根包的类
|
1065
|
+
lambda pkg: pkg.startswith(current_root) or self._calculate_package_similarity(pkg, current_package) > 0.3
|
1066
|
+
]
|
1067
|
+
|
1068
|
+
# 应用所有过滤规则
|
1069
|
+
for filter_func in filters:
|
1070
|
+
if not filter_func(candidate_package):
|
1071
|
+
return False
|
1072
|
+
|
1073
|
+
return True
|
1074
|
+
|
572
1075
|
def _calculate_package_similarity(self, package1: str, package2: str) -> float:
|
573
1076
|
"""
|
574
1077
|
计算两个包名的相似度
|
@@ -606,6 +1109,17 @@ class ProtoReconstructor:
|
|
606
1109
|
Returns:
|
607
1110
|
实际的完整类型名
|
608
1111
|
"""
|
1112
|
+
# 首先检查是否为基础类型,如果是则直接跳过
|
1113
|
+
basic_types = {
|
1114
|
+
'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
|
1115
|
+
'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
|
1116
|
+
'Byte', 'Short', 'Character'
|
1117
|
+
}
|
1118
|
+
|
1119
|
+
if inferred_type in basic_types:
|
1120
|
+
self.logger.debug(f" 跳过基础类型: {inferred_type}")
|
1121
|
+
return None
|
1122
|
+
|
609
1123
|
try:
|
610
1124
|
java_file = self._find_java_file(class_name)
|
611
1125
|
if not java_file:
|
@@ -626,6 +1140,11 @@ class ProtoReconstructor:
|
|
626
1140
|
# 取第一个匹配的类型
|
627
1141
|
actual_type_simple = matches[0]
|
628
1142
|
|
1143
|
+
# 再次检查匹配的类型是否为基础类型
|
1144
|
+
if actual_type_simple in basic_types:
|
1145
|
+
self.logger.debug(f" 匹配到基础类型,跳过: {actual_type_simple}")
|
1146
|
+
return None
|
1147
|
+
|
629
1148
|
# 检查是否有import语句
|
630
1149
|
import_pattern = rf'import\s+([^;]*\.{re.escape(actual_type_simple)});'
|
631
1150
|
import_matches = re.findall(import_pattern, content)
|