reproto 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. .git/COMMIT_EDITMSG +24 -19
  2. .git/index +0 -0
  3. .git/logs/HEAD +2 -0
  4. .git/logs/refs/heads/iyue +2 -0
  5. .git/logs/refs/remotes/gitlab/iyue +2 -0
  6. .git/logs/refs/remotes/origin/iyue +2 -0
  7. .git/objects/18/89a4fb55eb3abdc528ce87f0cea039278c06fd +0 -0
  8. .git/objects/20/cf56ec106bcd66420dd000279f983571b918b6 +0 -0
  9. .git/objects/21/55b64d52922c88527c102d62f23e5c2abbae79 +0 -0
  10. .git/objects/33/181441ab38eded005db356da89b54c7d29f452 +0 -0
  11. .git/objects/33/52dfa8f5d9eb46cc98ea7ccecf02e4d9df95f7 +0 -0
  12. .git/objects/3c/6f0120229cc2cd8123efbeb7f186eb0a485f29 +0 -0
  13. .git/objects/40/84f4567d983a977c49598b7d886e46b13ff50b +0 -0
  14. .git/objects/76/311aa8e59d780763e0d66787067cc5d9613a67 +0 -0
  15. .git/objects/8c/809c42c7ae13007fd885ee7bcffae7acf2c520 +0 -0
  16. .git/objects/97/56fe0931216a7c40cbf250e1ab8a6dfd589f13 +0 -0
  17. .git/objects/9a/e313cdf64cd82416c1238eb493e6396f799f12 +0 -0
  18. .git/objects/a5/b7e4e1b63bfb65288f6553687aaabcfb4d51b1 +0 -0
  19. .git/objects/cd/2d6c229438c6b1c694b9392a85888d89ef49c1 +0 -0
  20. .git/objects/e8/1433b6ad92206cdadbee1f474b4f99383314cb +0 -0
  21. .git/objects/e8/2f42ea26b8bf4f0bc92c0648ac8f190f14226d +0 -0
  22. .git/objects/e9/a15996cb55ac72aeb6611d26e8d22246589943 +0 -0
  23. .git/objects/f5/18c69a6e1bf3052b79da01502b2837ea58f0f4 +0 -0
  24. .git/objects/f7/25a430eb3364460ba854dbc8809edc21dc6c70 +0 -0
  25. .git/refs/heads/iyue +1 -1
  26. .git/refs/remotes/gitlab/iyue +1 -1
  27. .git/refs/remotes/origin/iyue +1 -1
  28. README.md +37 -117
  29. core/info_decoder.py +512 -105
  30. core/reconstructor.py +594 -75
  31. generation/proto_generator.py +25 -5
  32. main.py +38 -7
  33. parsing/java_parser.py +81 -1
  34. pyproject.toml +13 -2
  35. {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/METADATA +47 -120
  36. {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/RECORD +39 -21
  37. utils/logger.py +2 -2
  38. {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/WHEEL +0 -0
  39. {reproto-0.0.5.dist-info → reproto-0.0.7.dist-info}/entry_points.txt +0 -0
core/reconstructor.py CHANGED
@@ -65,7 +65,7 @@ class JavaSourceAnalyzer:
65
65
  从Java源码中获取字段的真实类型
66
66
 
67
67
  Args:
68
- field_name_raw: 原始字段名(如 id_
68
+ field_name_raw: 原始字段名(如 contacts_
69
69
  expected_type: 期望的基础类型(message、enum 或 map)
70
70
 
71
71
  Returns:
@@ -77,40 +77,74 @@ class JavaSourceAnalyzer:
77
77
  # 清理字段名
78
78
  field_name = field_name_raw.rstrip('_')
79
79
 
80
- # 对于map类型,特殊处理MapFieldLite声明
81
- if expected_type == 'map':
82
- map_type = self._get_map_type_from_field(field_name)
83
- if map_type:
84
- return map_type
85
-
86
- # 对于枚举类型,优先从setter方法中获取类型
87
- if expected_type == 'enum':
88
- setter_type = self._get_type_from_setter(field_name)
89
- if setter_type:
90
- return setter_type
91
-
92
- # 查找字段声明模式:private SomeType fieldName_;
93
- pattern = rf'private\s+(\w+)\s+{re.escape(field_name)}_\s*;'
94
- matches = re.findall(pattern, self._current_class_content)
95
-
96
- if matches:
97
- simple_type = matches[0]
98
-
99
- # 如果字段声明是基础类型(如int),但期望类型是enum,跳过
100
- if expected_type == 'enum' and simple_type in ['int', 'long', 'short', 'byte']:
101
- return None
102
-
103
- # 查找import语句获取完整类名
104
- import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
105
- import_matches = re.findall(import_pattern, self._current_class_content)
106
-
107
- if import_matches:
108
- return import_matches[0] # 返回完整的包名.类名
109
- else:
110
- # 如果没有import,假设在同一个包中
111
- if self._current_class_name:
112
- package_name = '.'.join(self._current_class_name.split('.')[:-1])
113
- return f"{package_name}.{simple_type}"
80
+ # 查找字段声明模式,支持多种声明格式
81
+ patterns = [
82
+ # Internal.ProtobufList<Contact> contacts_ = ...
83
+ rf'private\s+Internal\.ProtobufList<([^>]+)>\s+{re.escape(field_name)}_\s*=',
84
+ # MapFieldLite<String, Contact> contacts_ = ...
85
+ rf'private\s+MapFieldLite<([^,]+),\s*([^>]+)>\s+{re.escape(field_name)}_\s*=',
86
+ # List<Contact> contacts_ = ...
87
+ rf'private\s+List<([^>]+)>\s+{re.escape(field_name)}_\s*=',
88
+ # Internal.IntList badges_ = ... (用于枚举列表)
89
+ rf'private\s+(Internal\.IntList)\s+{re.escape(field_name)}_\s*=',
90
+ # 普通字段声明: private Contact contact_ = ...
91
+ rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*=',
92
+ # 简单字段声明: private Contact contact_;
93
+ rf'private\s+(\w+(?:\.\w+)*)\s+{re.escape(field_name)}_\s*;'
94
+ ]
95
+
96
+ for i, pattern in enumerate(patterns):
97
+ matches = re.findall(pattern, self._current_class_content)
98
+ if matches:
99
+ if i == 0: # Internal.ProtobufList<Contact>
100
+ element_type = matches[0]
101
+ return f"Internal.ProtobufList<{element_type}>"
102
+ elif i == 1: # MapFieldLite<String, Contact>
103
+ key_type, value_type = matches[0]
104
+ return f"MapFieldLite<{key_type.strip()}, {value_type.strip()}>"
105
+ elif i == 2: # List<Contact>
106
+ element_type = matches[0]
107
+ return f"List<{element_type}>"
108
+ elif i == 3: # Internal.IntList
109
+ return "Internal.IntList"
110
+ else: # 普通类型
111
+ simple_type = matches[0]
112
+
113
+ # 检查是否为Java基础类型,如果是则直接返回
114
+ basic_java_types = {
115
+ 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
116
+ 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
117
+ 'Byte', 'Short', 'Character'
118
+ }
119
+
120
+ if simple_type in basic_java_types:
121
+ return simple_type # 直接返回基础类型,不添加包名
122
+
123
+ # 如果字段声明是基础类型(如int),但期望类型是enum,尝试从setter方法获取真实类型
124
+ if expected_type == 'enum' and simple_type in ['int', 'long', 'short', 'byte']:
125
+ setter_type = self._get_type_from_setter(field_name)
126
+ if setter_type:
127
+ return setter_type
128
+ continue
129
+
130
+ # 特殊处理:Internal.IntList可能对应枚举列表
131
+ if simple_type == 'Internal.IntList':
132
+ # 检查是否有对应的枚举setter方法
133
+ enum_type = self._get_enum_type_from_list_setter(field_name)
134
+ if enum_type:
135
+ return f"Internal.ProtobufList<{enum_type}>"
136
+
137
+ # 查找import语句获取完整类名
138
+ import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
139
+ import_matches = re.findall(import_pattern, self._current_class_content)
140
+
141
+ if import_matches:
142
+ return import_matches[0] # 返回完整的包名.类名
143
+ else:
144
+ # 如果没有import,假设在同一个包中
145
+ if self._current_class_name:
146
+ package_name = '.'.join(self._current_class_name.split('.')[:-1])
147
+ return f"{package_name}.{simple_type}"
114
148
 
115
149
  return None
116
150
 
@@ -151,24 +185,38 @@ class JavaSourceAnalyzer:
151
185
  Returns:
152
186
  protobuf类型名
153
187
  """
188
+ if not java_type:
189
+ return 'string'
190
+
154
191
  # 基础类型映射
155
192
  basic_types = {
193
+ 'int': 'int32',
194
+ 'long': 'int64',
195
+ 'float': 'float',
196
+ 'double': 'double',
197
+ 'boolean': 'bool',
156
198
  'String': 'string',
157
- 'Integer': 'int32',
158
- 'Long': 'int64',
159
- 'Boolean': 'bool',
160
- 'Float': 'float',
161
- 'Double': 'double',
162
- 'ByteString': 'bytes'
199
+ 'java.lang.String': 'string',
200
+ 'java.lang.Integer': 'int32',
201
+ 'java.lang.Long': 'int64',
202
+ 'java.lang.Float': 'float',
203
+ 'java.lang.Double': 'double',
204
+ 'java.lang.Boolean': 'bool',
205
+ 'byte[]': 'bytes',
206
+ 'ByteString': 'bytes',
207
+ 'com.google.protobuf.ByteString': 'bytes',
163
208
  }
164
209
 
210
+ # 检查是否为基础类型
165
211
  if java_type in basic_types:
166
212
  return basic_types[java_type]
167
213
 
168
- # 对于其他类型,去掉包名,只保留类名
214
+ # 如果是完整的类名,提取简单类名
169
215
  if '.' in java_type:
170
- return java_type.split('.')[-1]
216
+ simple_name = java_type.split('.')[-1]
217
+ return simple_name
171
218
 
219
+ # 默认返回原类型名
172
220
  return java_type
173
221
 
174
222
  def _get_type_from_setter(self, field_name: str) -> Optional[str]:
@@ -184,8 +232,49 @@ class JavaSourceAnalyzer:
184
232
  # 将字段名转换为setter方法名
185
233
  setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
186
234
 
187
- # 查找setter方法:public void setSpamType(SpamType spamType)
188
- pattern = rf'public\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)'
235
+ # 查找私有setter方法:/* JADX INFO: Access modifiers changed from: private */
236
+ # public void setSpamType(SpamType spamType)
237
+ patterns = [
238
+ # 查找setter方法签名,支持public或private
239
+ rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)',
240
+ # 也支持注释中的private标记
241
+ rf'\/\*[^*]*private[^*]*\*\/\s*(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*(\w+)\s+\w+\s*\)'
242
+ ]
243
+
244
+ for pattern in patterns:
245
+ matches = re.findall(pattern, self._current_class_content, re.DOTALL)
246
+ if matches:
247
+ simple_type = matches[0]
248
+
249
+ # 查找import语句获取完整类名
250
+ import_pattern = rf'import\s+([^;]*\.{re.escape(simple_type)});'
251
+ import_matches = re.findall(import_pattern, self._current_class_content)
252
+
253
+ if import_matches:
254
+ return import_matches[0]
255
+ else:
256
+ # 如果没有import,假设在同一个包中
257
+ if self._current_class_name:
258
+ package_name = '.'.join(self._current_class_name.split('.')[:-1])
259
+ return f"{package_name}.{simple_type}"
260
+
261
+ return None
262
+
263
+ def _get_enum_type_from_list_setter(self, field_name: str) -> Optional[str]:
264
+ """
265
+ 从列表setter方法中获取枚举类型(如setBadges(int i10, Badge badge))
266
+
267
+ Args:
268
+ field_name: 字段名(如 badges)
269
+
270
+ Returns:
271
+ 枚举类型名
272
+ """
273
+ # 将字段名转换为setter方法名
274
+ setter_name = f"set{field_name[0].upper()}{field_name[1:]}"
275
+
276
+ # 查找列表setter方法:setBadges(int i10, Badge badge)
277
+ pattern = rf'(?:public|private)\s+void\s+{re.escape(setter_name)}\s*\(\s*int\s+\w+,\s*(\w+)\s+\w+\s*\)'
189
278
  matches = re.findall(pattern, self._current_class_content)
190
279
 
191
280
  if matches:
@@ -249,38 +338,52 @@ class ProtoReconstructor:
249
338
 
250
339
  # 初始化核心组件
251
340
  self.java_parser = JavaParser() # Java文件解析器
252
- # 创建Java源码分析器并传递给InfoDecoder
253
- self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
254
- self.info_decoder = InfoDecoder(self.java_source_analyzer) # 字节码解码器
341
+ self.enum_parser = EnumParser(str(sources_dir)) # 枚举解析器需要字符串路径
342
+ self.info_decoder = InfoDecoder()
255
343
  self.proto_generator = ProtoGenerator() # Proto文件生成器
256
344
 
345
+ # 初始化Java源码分析器
346
+ self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
347
+ self.info_decoder.java_source_analyzer = self.java_source_analyzer
348
+
257
349
  # 任务调度状态
258
350
  self.processed_classes: Set[str] = set() # 已处理的类
259
351
  self.pending_classes: deque = deque() # 待处理的类队列
260
352
  self.message_definitions: Dict[str, MessageDefinition] = {} # 消息定义
261
353
  self.enum_definitions: Dict[str, EnumDefinition] = {} # 枚举定义
262
354
 
355
+ # 错误和状态跟踪
356
+ self.failed_classes: Dict[str, str] = {} # 失败的类 -> 失败原因
357
+ self.skipped_classes: Dict[str, str] = {} # 跳过的类 -> 跳过原因
358
+
359
+ # 当前处理的类名(用于调试)
360
+ self._current_processing_class = None
361
+
263
362
  def reconstruct_from_root(self, root_class: str) -> Dict[str, any]:
264
363
  """
265
- 从根类开始重构所有相关的proto文件
364
+ 从根类开始重构protobuf定义
266
365
 
267
366
  Args:
268
- root_class: 根类的完整类名,如 'com.example.Model'
367
+ root_class: 根类的完整名称
269
368
 
270
369
  Returns:
271
- 重构结果字典
370
+ 包含统计信息的字典
272
371
  """
273
- self.logger.info(f"开始重构,根类: {root_class}")
372
+ self.logger.info(f"🚀 开始重构,根类: {root_class}")
274
373
 
275
- # 启动任务队列
374
+ # 1. 添加根类到处理队列
276
375
  self.pending_classes.append(root_class)
277
376
 
278
- # 广度优先处理所有依赖类
377
+ # 2. 处理所有消息类
279
378
  self._process_all_classes()
280
379
 
281
- # 生成最终的proto文件
380
+ # 3. 解析所有枚举类
381
+ self._process_all_enums()
382
+
383
+ # 4. 生成proto文件
282
384
  self._generate_all_proto_files()
283
385
 
386
+ # 5. 返回统计信息
284
387
  # 报告未知类型统计
285
388
  self._report_unknown_types()
286
389
 
@@ -304,6 +407,35 @@ class ProtoReconstructor:
304
407
  self.logger.info(f"处理类: {class_name}")
305
408
  self._process_single_class(class_name)
306
409
 
410
+ def _process_all_enums(self) -> None:
411
+ """解析目标包下的所有枚举类"""
412
+ self.logger.info("🔢 开始解析枚举类...")
413
+
414
+ # 从已处理的类中推断目标包名
415
+ target_package = None
416
+ if self.message_definitions:
417
+ # 取第一个消息定义的包名
418
+ first_message = next(iter(self.message_definitions.values()))
419
+ target_package = first_message.package_name
420
+ elif self.processed_classes:
421
+ # 从已处理的类名中推断包名
422
+ first_class = next(iter(self.processed_classes))
423
+ target_package = '.'.join(first_class.split('.')[:-1])
424
+
425
+ if not target_package:
426
+ self.logger.warning("⚠️ 无法推断目标包名,跳过枚举解析")
427
+ return
428
+
429
+ # 解析目标包下的所有枚举
430
+ enum_definitions = self.enum_parser.parse_all_enums(target_package)
431
+
432
+ # 存储枚举定义
433
+ for enum_def in enum_definitions:
434
+ self.enum_definitions[enum_def.full_name] = enum_def
435
+ self.logger.info(f" ✅ 解析枚举: {enum_def.name} ({len(enum_def.values)} 个值)")
436
+
437
+ self.logger.info(f"📊 枚举解析完成,共解析 {len(enum_definitions)} 个枚举")
438
+
307
439
  def _process_single_class(self, class_name: str) -> None:
308
440
  """
309
441
  处理单个Java类
@@ -317,10 +449,19 @@ class ProtoReconstructor:
317
449
  self.java_source_analyzer.set_current_class(class_name)
318
450
 
319
451
  try:
452
+ # 检查是否应该跳过这个类
453
+ if self._should_skip_class(class_name):
454
+ skip_reason = self._get_skip_reason(class_name)
455
+ self.skipped_classes[class_name] = skip_reason
456
+ self.logger.info(f" ⏭️ 跳过类: {class_name} ({skip_reason})")
457
+ return
458
+
320
459
  # 1. 查找Java文件
321
460
  java_file_path = self._find_java_file(class_name)
322
461
  if not java_file_path:
323
- self.logger.info(f" ⚠️ 找不到Java文件: {class_name}")
462
+ error_msg = "找不到对应的Java文件"
463
+ self.failed_classes[class_name] = error_msg
464
+ self.logger.warning(f" ❌ {error_msg}: {class_name}")
324
465
  return
325
466
 
326
467
  # 2. 尝试解析为枚举
@@ -335,12 +476,14 @@ class ProtoReconstructor:
335
476
  # 3. 尝试解析为消息类
336
477
  info_string, objects_array = self.java_parser.parse_java_file(java_file_path)
337
478
  if not info_string:
338
- self.logger.info(f" ⚠️ 无法解析Java文件: {class_name}")
479
+ error_msg = "无法从Java文件中提取protobuf信息"
480
+ self.failed_classes[class_name] = error_msg
481
+ self.logger.warning(f" ❌ {error_msg}: {class_name}")
339
482
  return
340
483
 
341
484
  # 4. 解码字节码为消息定义
342
485
  message_def = self.info_decoder.decode_message_info(
343
- class_name, info_string, objects_array
486
+ class_name, info_string, objects_array, java_file_path
344
487
  )
345
488
 
346
489
  if message_def:
@@ -350,10 +493,16 @@ class ProtoReconstructor:
350
493
  # 5. 发现并添加依赖类到队列
351
494
  self._discover_dependencies(message_def)
352
495
  else:
353
- self.logger.info(f" ❌ 解码失败: {class_name}")
496
+ error_msg = "字节码解码失败,可能不是protobuf消息类"
497
+ self.failed_classes[class_name] = error_msg
498
+ self.logger.warning(f" ❌ {error_msg}: {class_name}")
354
499
 
355
500
  except Exception as e:
356
- self.logger.error(f"处理异常: {class_name} - {e}")
501
+ error_msg = f"处理异常: {str(e)}"
502
+ self.failed_classes[class_name] = error_msg
503
+ self.logger.error(f" ❌ {error_msg}: {class_name}")
504
+ if hasattr(self, '_verbose') and self._verbose:
505
+ self.logger.exception(f"详细异常信息 ({class_name}):")
357
506
  finally:
358
507
  # 无论成功失败都标记为已处理,避免无限循环
359
508
  self.processed_classes.add(class_name)
@@ -387,19 +536,202 @@ class ProtoReconstructor:
387
536
 
388
537
  # 从常规字段提取依赖
389
538
  for field in message_def.fields:
390
- dep = self._resolve_field_dependency(field.type_name, message_def.package_name)
391
- if dep:
392
- dependencies.append(dep)
539
+ deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
540
+ dependencies.extend(deps)
393
541
 
394
542
  # 从oneof字段提取依赖
395
543
  for oneof in message_def.oneofs:
396
544
  for field in oneof.fields:
397
- dep = self._resolve_field_dependency(field.type_name, message_def.package_name)
398
- if dep:
399
- dependencies.append(dep)
545
+ deps = self._extract_field_dependencies(field.type_name, message_def.package_name)
546
+ dependencies.extend(deps)
547
+
548
+ # 去重
549
+ return list(set(dependencies))
550
+
551
+ def _extract_field_dependencies(self, type_name: str, current_package: str) -> List[str]:
552
+ """
553
+ 从字段类型中提取所有依赖(包括map类型的键值类型)
554
+
555
+ Args:
556
+ type_name: 字段类型名
557
+ current_package: 当前包名
558
+
559
+ Returns:
560
+ 依赖类名列表
561
+ """
562
+ dependencies = []
563
+
564
+ if not type_name:
565
+ return dependencies
566
+
567
+ # 处理map类型: map<string, Contact> -> [Contact]
568
+ if type_name.startswith('map<') and type_name.endswith('>'):
569
+ map_content = type_name[4:-1] # 移除 'map<' 和 '>'
570
+ # 分割键值类型,处理嵌套的尖括号
571
+ key_type, value_type = self._parse_map_types(map_content)
572
+
573
+ # 递归处理键类型和值类型
574
+ dependencies.extend(self._extract_field_dependencies(key_type, current_package))
575
+ dependencies.extend(self._extract_field_dependencies(value_type, current_package))
576
+
577
+ # 处理普通类型
578
+ else:
579
+ dep = self._resolve_field_dependency(type_name, current_package)
580
+ if dep:
581
+ dependencies.append(dep)
400
582
 
401
583
  return dependencies
402
584
 
585
+ def _parse_map_types(self, map_content: str) -> tuple:
586
+ """
587
+ 解析map类型的键值类型
588
+
589
+ Args:
590
+ map_content: map内容,如 "string, Contact" 或 "string, List<Contact>"
591
+
592
+ Returns:
593
+ (key_type, value_type) 元组
594
+ """
595
+ # 简单情况:没有嵌套的尖括号
596
+ if '<' not in map_content:
597
+ parts = [part.strip() for part in map_content.split(',', 1)]
598
+ if len(parts) == 2:
599
+ return parts[0], parts[1]
600
+
601
+ # 复杂情况:处理嵌套的尖括号
602
+ bracket_count = 0
603
+ for i, char in enumerate(map_content):
604
+ if char == '<':
605
+ bracket_count += 1
606
+ elif char == '>':
607
+ bracket_count -= 1
608
+ elif char == ',' and bracket_count == 0:
609
+ # 找到分隔符
610
+ key_type = map_content[:i].strip()
611
+ value_type = map_content[i+1:].strip()
612
+ return key_type, value_type
613
+
614
+ # 如果解析失败,返回默认值
615
+ return 'string', 'string'
616
+
617
+ def _should_skip_class(self, class_name: str) -> bool:
618
+ """
619
+ 判断是否应该跳过某个类
620
+
621
+ Args:
622
+ class_name: 类名
623
+
624
+ Returns:
625
+ 是否应该跳过
626
+ """
627
+ # 跳过已经处理过的类
628
+ if class_name in self.processed_classes:
629
+ return True
630
+
631
+ # 跳过基础类型(包括Java基础类型和常见的系统类型)
632
+ basic_types = {
633
+ # Java基础类型
634
+ 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
635
+ 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
636
+ 'Byte', 'Short', 'Character',
637
+ # Java系统类型
638
+ 'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
639
+ 'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
640
+ 'java.lang.Object', 'java.util.List', 'java.util.Map',
641
+ 'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
642
+ }
643
+
644
+ if class_name in basic_types:
645
+ return True
646
+
647
+ # 跳过明显的系统类型和内部类型
648
+ if self._is_system_or_internal_type(class_name):
649
+ return True
650
+
651
+ return False
652
+
653
+ def _is_system_or_internal_type(self, class_name: str) -> bool:
654
+ """
655
+ 判断是否为系统类型或内部类型
656
+
657
+ Args:
658
+ class_name: 类名
659
+
660
+ Returns:
661
+ 是否为系统或内部类型
662
+ """
663
+ # 跳过明显不是protobuf类的包
664
+ skip_packages = [
665
+ 'java.', 'javax.', 'android.', 'androidx.',
666
+ 'kotlin.', 'kotlinx.', 'com.google.common.',
667
+ 'org.apache.', 'org.junit.', 'junit.',
668
+ 'com.unity3d.', # 添加Unity3D包,避免误匹配
669
+ 'Internal.' # 跳过Internal包下的类型
670
+ ]
671
+
672
+ for skip_pkg in skip_packages:
673
+ if class_name.startswith(skip_pkg):
674
+ return True
675
+
676
+ # 跳过明显的内部类型
677
+ internal_patterns = [
678
+ 'Internal.ProtobufList',
679
+ 'MapFieldLite',
680
+ 'GeneratedMessageLite',
681
+ 'MessageLiteOrBuilder'
682
+ ]
683
+
684
+ for pattern in internal_patterns:
685
+ if pattern in class_name:
686
+ return True
687
+
688
+ return False
689
+
690
+ def _get_skip_reason(self, class_name: str) -> str:
691
+ """
692
+ 获取跳过类的原因
693
+
694
+ Args:
695
+ class_name: 类名
696
+
697
+ Returns:
698
+ 跳过原因
699
+ """
700
+ # 基础类型
701
+ basic_types = {
702
+ 'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
703
+ 'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
704
+ 'java.lang.Object', 'java.util.List', 'java.util.Map',
705
+ 'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
706
+ }
707
+
708
+ if class_name in basic_types:
709
+ return "基础类型"
710
+
711
+ # 已处理
712
+ if class_name in self.processed_classes:
713
+ return "已处理"
714
+
715
+ # 系统包
716
+ system_packages = {
717
+ 'java.': 'Java系统包',
718
+ 'javax.': 'Java扩展包',
719
+ 'android.': 'Android系统包',
720
+ 'androidx.': 'AndroidX包',
721
+ 'kotlin.': 'Kotlin标准库',
722
+ 'kotlinx.': 'Kotlin扩展库',
723
+ 'com.google.common.': 'Google通用库',
724
+ 'org.apache.': 'Apache库',
725
+ 'org.junit.': 'JUnit测试库',
726
+ 'junit.': 'JUnit库'
727
+ }
728
+
729
+ for prefix, reason in system_packages.items():
730
+ if class_name.startswith(prefix):
731
+ return reason
732
+
733
+ return "未知原因"
734
+
403
735
  def _resolve_field_dependency(self, type_name: str, current_package: str) -> Optional[str]:
404
736
  """
405
737
  解析字段类型名为完整的类名
@@ -414,9 +746,13 @@ class ProtoReconstructor:
414
746
  if not type_name:
415
747
  return None
416
748
 
417
- # 跳过基础类型
418
- basic_types = {'string', 'int32', 'int64', 'bool', 'float', 'double', 'bytes', 'message', 'enum'}
419
- if type_name in basic_types:
749
+ # 检查是否为基础类型
750
+ basic_proto_types = {
751
+ 'string', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
752
+ 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'float', 'double', 'bytes'
753
+ }
754
+
755
+ if type_name in basic_proto_types:
420
756
  return None
421
757
 
422
758
  # 如果已经是完整类名,直接返回
@@ -431,7 +767,13 @@ class ProtoReconstructor:
431
767
  # 如果推断失败,尝试查找所有可能的匹配类
432
768
  # 需要传递当前类名以便进行源码分析
433
769
  current_class = getattr(self, '_current_processing_class', None)
434
- return self._find_best_matching_class(type_name, current_package, current_class)
770
+ best_match = self._find_best_matching_class(type_name, current_package, current_class)
771
+
772
+ # 如果找到匹配,验证该类是否确实存在
773
+ if best_match and self._find_java_file(best_match):
774
+ return best_match
775
+
776
+ return None
435
777
 
436
778
  def _find_java_file(self, class_name: str) -> Optional[Path]:
437
779
  """
@@ -450,10 +792,31 @@ class ProtoReconstructor:
450
792
  if full_path.exists():
451
793
  return full_path
452
794
 
795
+ # 处理内部类:支持多层嵌套
796
+ # com.example.Models$Inner$Deep -> com/example/Models$Inner$Deep.java
797
+ if '$' in class_name:
798
+ # 找到最后一个.的位置,分离包名和类名部分
799
+ last_dot_index = class_name.rfind('.')
800
+ if last_dot_index != -1:
801
+ package_path = class_name[:last_dot_index].replace('.', '/') # 包路径
802
+ class_part = class_name[last_dot_index + 1:] # 类名部分(可能包含多个$)
803
+ inner_class_file_path = f"{package_path}/{class_part}.java"
804
+ inner_class_full_path = self.sources_dir / inner_class_file_path
805
+
806
+ if inner_class_full_path.exists():
807
+ return inner_class_full_path
808
+
453
809
  # 备选方案:按简单类名搜索
454
810
  simple_name = class_name.split('.')[-1]
455
- for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
456
- return java_file
811
+ # 对于内部类,简单名称可能包含多个$符号
812
+ if '$' in simple_name:
813
+ # 对于内部类,直接使用包含$的完整文件名搜索
814
+ for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
815
+ return java_file
816
+ else:
817
+ # 对于普通类,使用原来的逻辑
818
+ for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
819
+ return java_file
457
820
 
458
821
  return None
459
822
 
@@ -503,7 +866,8 @@ class ProtoReconstructor:
503
866
  if len(package_parts) > 1:
504
867
  parent = '.'.join(package_parts[:-1])
505
868
  # 常见的同级包名
506
- common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf']
869
+ common_siblings = ['models', 'model', 'types', 'entities', 'data', 'proto', 'protobuf',
870
+ 'enums', 'enum', 'common', 'shared', 'core', 'base']
507
871
  for sibling in common_siblings:
508
872
  if sibling != package_parts[-1]: # 避免重复
509
873
  candidates.append(f"{parent}.{sibling}")
@@ -511,10 +875,32 @@ class ProtoReconstructor:
511
875
  # 4. 根包下的常见子包
512
876
  if len(package_parts) > 2:
513
877
  root_package = '.'.join(package_parts[:2]) # 如 com.example
514
- common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto']
878
+ common_subpackages = ['models', 'model', 'types', 'entities', 'common', 'shared', 'proto',
879
+ 'enums', 'enum', 'core', 'base', 'data', 'dto', 'vo']
515
880
  for subpkg in common_subpackages:
516
881
  candidates.append(f"{root_package}.{subpkg}")
517
882
 
883
+ # 5. 深度搜索:在当前包的各级父包下寻找常见子包
884
+ for i in range(len(package_parts) - 1, 1, -1):
885
+ parent_package = '.'.join(package_parts[:i])
886
+ # 在每个父包下寻找常见的子包
887
+ search_patterns = ['models', 'enums', 'types', 'common', 'shared', 'core']
888
+ for pattern in search_patterns:
889
+ candidates.append(f"{parent_package}.{pattern}")
890
+ # 也尝试更深一层的组合
891
+ if i > 2:
892
+ candidates.append(f"{parent_package}.{pattern}.{package_parts[-1]}")
893
+
894
+ # 6. 特殊情况:如果当前是v1包,也尝试其他版本
895
+ if 'v1' in package_parts:
896
+ for i, part in enumerate(package_parts):
897
+ if part == 'v1':
898
+ # 尝试v2, v3等
899
+ for version in ['v2', 'v3', 'v4']:
900
+ version_package = package_parts.copy()
901
+ version_package[i] = version
902
+ candidates.append('.'.join(version_package))
903
+
518
904
  # 去重并保持顺序
519
905
  seen = set()
520
906
  unique_candidates = []
@@ -544,6 +930,11 @@ class ProtoReconstructor:
544
930
  self.logger.info(f" 🔍 源码分析: {type_name} -> {actual_type}")
545
931
  return actual_type
546
932
 
933
+ # 预检查:如果是基础字段名,可能不需要创建单独的类
934
+ if self._is_basic_field_type(type_name, current_class):
935
+ self.logger.info(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
936
+ return None
937
+
547
938
  # 如果源码分析失败,回退到模糊匹配
548
939
  matching_classes = []
549
940
 
@@ -557,7 +948,11 @@ class ProtoReconstructor:
557
948
  if package_parts:
558
949
  package_name = '.'.join(package_parts)
559
950
  full_class_name = f"{package_name}.{file_name}"
560
- matching_classes.append((full_class_name, self._calculate_package_similarity(package_name, current_package)))
951
+
952
+ # 添加包名过滤,避免匹配到无关的第三方库
953
+ if self._is_valid_package_for_matching(package_name, current_package):
954
+ similarity = self._calculate_package_similarity(package_name, current_package)
955
+ matching_classes.append((full_class_name, similarity))
561
956
 
562
957
  if not matching_classes:
563
958
  return None
@@ -569,6 +964,114 @@ class ProtoReconstructor:
569
964
  self.logger.info(f" 🔍 智能匹配: {type_name} -> {best_match}")
570
965
  return best_match
571
966
 
967
+ def _is_basic_field_type(self, type_name: str, current_class: str = None) -> bool:
968
+ """
969
+ 检查是否为基础字段类型,避免为简单字段创建不必要的类
970
+
971
+ Args:
972
+ type_name: 类型名
973
+ current_class: 当前类名
974
+
975
+ Returns:
976
+ 是否为基础字段类型
977
+ """
978
+ # 首先检查是否为Java基础类型
979
+ basic_java_types = {
980
+ 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
981
+ 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
982
+ 'Byte', 'Short', 'Character'
983
+ }
984
+
985
+ if type_name in basic_java_types:
986
+ return True
987
+
988
+ # 常见的基础字段名模式
989
+ basic_patterns = [
990
+ 'tags', # tags_ 字段通常是 repeated string
991
+ 'ids', # ids_ 字段通常是 repeated string 或 repeated int64
992
+ 'values', # values_ 字段通常是基础类型数组
993
+ 'names', # names_ 字段通常是 repeated string
994
+ 'urls', # urls_ 字段通常是 repeated string
995
+ 'emails', # emails_ 字段通常是 repeated string
996
+ ]
997
+
998
+ type_lower = type_name.lower()
999
+
1000
+ # 检查是否匹配基础模式
1001
+ if type_lower in basic_patterns:
1002
+ return True
1003
+
1004
+ # 如果有当前类,尝试从Java源码中验证
1005
+ if current_class:
1006
+ try:
1007
+ java_file = self._find_java_file(current_class)
1008
+ if java_file:
1009
+ content = java_file.read_text(encoding='utf-8')
1010
+
1011
+ # 查找对应的字段声明,检查是否为基础类型
1012
+ field_name_pattern = type_lower.rstrip('s') + 's?_' # tags -> tags?_
1013
+ import re
1014
+
1015
+ # 查找字段声明:private List<String> tags_; 或 private Internal.ProtobufList<String> tags_;
1016
+ patterns = [
1017
+ rf'private\s+(?:Internal\.)?ProtobufList<String>\s+{field_name_pattern}',
1018
+ rf'private\s+List<String>\s+{field_name_pattern}',
1019
+ rf'private\s+(?:Internal\.)?ProtobufList<Integer>\s+{field_name_pattern}',
1020
+ rf'private\s+List<Integer>\s+{field_name_pattern}',
1021
+ rf'private\s+(?:Internal\.)?ProtobufList<Long>\s+{field_name_pattern}',
1022
+ rf'private\s+List<Long>\s+{field_name_pattern}',
1023
+ ]
1024
+
1025
+ for pattern in patterns:
1026
+ if re.search(pattern, content, re.IGNORECASE):
1027
+ return True
1028
+
1029
+ except Exception as e:
1030
+ self.logger.debug(f" 检查基础字段类型时出错: {e}")
1031
+
1032
+ return False
1033
+
1034
+ def _is_valid_package_for_matching(self, candidate_package: str, current_package: str) -> bool:
1035
+ """
1036
+ 检查候选包名是否适合用于匹配
1037
+
1038
+ Args:
1039
+ candidate_package: 候选包名
1040
+ current_package: 当前包名
1041
+
1042
+ Returns:
1043
+ 是否为有效的匹配候选
1044
+ """
1045
+ # 获取当前包的根包名(通常是前两部分,如 com.truecaller)
1046
+ current_parts = current_package.split('.')
1047
+ if len(current_parts) >= 2:
1048
+ current_root = '.'.join(current_parts[:2])
1049
+ else:
1050
+ current_root = current_package
1051
+
1052
+ # 过滤规则
1053
+ filters = [
1054
+ # 1. 排除明显的第三方库
1055
+ lambda pkg: 'unity3d' not in pkg.lower(),
1056
+ lambda pkg: 'facebook' not in pkg.lower(),
1057
+ lambda pkg: 'google' not in pkg.lower() or pkg.startswith(current_root),
1058
+ lambda pkg: 'android' not in pkg.lower() or pkg.startswith(current_root),
1059
+ lambda pkg: 'androidx' not in pkg.lower(),
1060
+ lambda pkg: 'kotlin' not in pkg.lower(),
1061
+ lambda pkg: 'java' not in pkg.lower(),
1062
+ lambda pkg: 'javax' not in pkg.lower(),
1063
+
1064
+ # 2. 优先选择同根包的类
1065
+ lambda pkg: pkg.startswith(current_root) or self._calculate_package_similarity(pkg, current_package) > 0.3
1066
+ ]
1067
+
1068
+ # 应用所有过滤规则
1069
+ for filter_func in filters:
1070
+ if not filter_func(candidate_package):
1071
+ return False
1072
+
1073
+ return True
1074
+
572
1075
  def _calculate_package_similarity(self, package1: str, package2: str) -> float:
573
1076
  """
574
1077
  计算两个包名的相似度
@@ -606,6 +1109,17 @@ class ProtoReconstructor:
606
1109
  Returns:
607
1110
  实际的完整类型名
608
1111
  """
1112
+ # 首先检查是否为基础类型,如果是则直接跳过
1113
+ basic_types = {
1114
+ 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
1115
+ 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
1116
+ 'Byte', 'Short', 'Character'
1117
+ }
1118
+
1119
+ if inferred_type in basic_types:
1120
+ self.logger.debug(f" 跳过基础类型: {inferred_type}")
1121
+ return None
1122
+
609
1123
  try:
610
1124
  java_file = self._find_java_file(class_name)
611
1125
  if not java_file:
@@ -626,6 +1140,11 @@ class ProtoReconstructor:
626
1140
  # 取第一个匹配的类型
627
1141
  actual_type_simple = matches[0]
628
1142
 
1143
+ # 再次检查匹配的类型是否为基础类型
1144
+ if actual_type_simple in basic_types:
1145
+ self.logger.debug(f" 匹配到基础类型,跳过: {actual_type_simple}")
1146
+ return None
1147
+
629
1148
  # 检查是否有import语句
630
1149
  import_pattern = rf'import\s+([^;]*\.{re.escape(actual_type_simple)});'
631
1150
  import_matches = re.findall(import_pattern, content)