reproto 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. .git/COMMIT_EDITMSG +30 -1
  2. .git/index +0 -0
  3. .git/logs/HEAD +1 -0
  4. .git/logs/refs/heads/iyue +1 -0
  5. .git/logs/refs/remotes/gitlab/iyue +1 -0
  6. .git/logs/refs/remotes/origin/iyue +1 -0
  7. .git/objects/03/3fab23ccc74b707a31f6f37dc5df42c100aac0 +0 -0
  8. .git/objects/07/b71df33935c14ddf99e3480c04eca4685028bc +0 -0
  9. .git/objects/0b/e20ea749c6ae1075ae98f9426e00b0a4a235af +0 -0
  10. .git/objects/13/5f50fea51c72f977f48d06a7584aba0f61e260 +0 -0
  11. .git/objects/1b/97584ebada3e93d7a2cfa4a1aae0b79c414d20 +0 -0
  12. .git/objects/1b/f0c1ad9586578e8332d061f7648dcb041ec063 +0 -0
  13. .git/objects/1e/46816d16e7c9db7575f1964403c3daa105be5c +0 -0
  14. .git/objects/27/24208ca2442e8ab9b453d0eb230fa41243b270 +0 -0
  15. .git/objects/2d/2b812ca27c477f9e1b2c2706a5eb795ffcf8eb +0 -0
  16. .git/objects/3f/b8830f516342a0ae1cb7c34b65016827cb9570 +4 -0
  17. .git/objects/41/f40c22247de377be99e30784229f3f128508a2 +0 -0
  18. .git/objects/48/cb11e75518a53be14146018214110986fade67 +0 -0
  19. .git/objects/4f/c6dc41f9c0a1a8e0eedd3ba49c43d78d0dbaba +0 -0
  20. .git/objects/59/4c23f158ccbd0a4288f9ea046d06160195afbf +0 -0
  21. .git/objects/5b/d4d87753b79e9157817f0c2e6964a731052854 +0 -0
  22. .git/objects/65/0189fe083bd711e45d463b229a72be619abad2 +0 -0
  23. .git/objects/66/663db35bfec8ef5f1a5b1c840fde1bb62a0eb8 +0 -0
  24. .git/objects/8a/0ed0ed8886fbc823e8d2258fa6d18699e94e25 +0 -0
  25. .git/objects/8d/d857b3d0ab3f5cd2e9173d532ef86e30df8eda +0 -0
  26. .git/objects/9e/23448ac58f907d9d123c32bdccedbb3d6741b5 +0 -0
  27. .git/objects/a0/d192999af7e2cbfa6a9ccd04d720a04e5a06d5 +0 -0
  28. .git/objects/a9/cc7923c34a4c97c5711d6309672f41d46c612a +0 -0
  29. .git/objects/af/c9cc15629847447063e86a82b8b56abb4fc08f +0 -0
  30. .git/objects/b1/db1c131cf32916028342c0037ce8eb57a8eb26 +0 -0
  31. .git/objects/b2/8334b94392b8af397a05ed702690fa6c9ab1ca +0 -0
  32. .git/objects/b8/7c89dcfce9e244ff5ef6a4bd394de12e8c8092 +0 -0
  33. .git/objects/d9/3bd435c8c7ad4efb83dff04d5450fabb9e3faf +0 -0
  34. .git/objects/e3/27755808d88c7ae5c06c229cf18bd0519646df +0 -0
  35. .git/objects/e5/83e7c40be934d16a1fa2e973487b395d930f42 +0 -0
  36. .git/objects/ed/1ae867d5e63195845afc58d88c38ecbdea97df +0 -0
  37. .git/objects/ef/f44e5099da27f7fb1ef14bb34902ccf4250b89 +0 -0
  38. .git/objects/f5/1be495b96272fa2e47f30071aed35ac1f0dd2c +0 -0
  39. .git/objects/fd/0bc07dc3c95e6168ab6d367d9eca139ac1e539 +0 -0
  40. .git/refs/heads/iyue +1 -1
  41. .git/refs/remotes/gitlab/iyue +1 -1
  42. .git/refs/remotes/origin/iyue +1 -1
  43. .gitignore +2 -1
  44. core/info_decoder.py +507 -6
  45. core/reconstructor.py +135 -6
  46. generation/proto_generator.py +51 -11
  47. include/google/protobuf/any.proto +162 -0
  48. include/google/protobuf/api.proto +207 -0
  49. include/google/protobuf/compiler/plugin.proto +180 -0
  50. include/google/protobuf/cpp_features.proto +67 -0
  51. include/google/protobuf/descriptor.proto +1417 -0
  52. include/google/protobuf/duration.proto +115 -0
  53. include/google/protobuf/empty.proto +51 -0
  54. include/google/protobuf/field_mask.proto +245 -0
  55. include/google/protobuf/go_features.proto +80 -0
  56. include/google/protobuf/java_features.proto +130 -0
  57. include/google/protobuf/source_context.proto +48 -0
  58. include/google/protobuf/struct.proto +95 -0
  59. include/google/protobuf/timestamp.proto +144 -0
  60. include/google/protobuf/type.proto +193 -0
  61. include/google/protobuf/wrappers.proto +157 -0
  62. parsing/java_parser.py +295 -12
  63. pyproject.toml +1 -1
  64. {reproto-0.1.1.dist-info → reproto-0.1.2.dist-info}/METADATA +1 -1
  65. {reproto-0.1.1.dist-info → reproto-0.1.2.dist-info}/RECORD +69 -20
  66. utils/builtin_proto.py +262 -0
  67. utils/type_utils.py +39 -6
  68. {reproto-0.1.1.dist-info → reproto-0.1.2.dist-info}/WHEEL +0 -0
  69. {reproto-0.1.1.dist-info → reproto-0.1.2.dist-info}/entry_points.txt +0 -0
core/info_decoder.py CHANGED
@@ -93,12 +93,19 @@ class InfoDecoder:
93
93
  # 创建消息定义
94
94
  message_def = self._create_message_definition(class_name)
95
95
 
96
+ # 存储当前处理的类名,供依赖推断使用
97
+ self._current_processing_class = class_name
98
+
96
99
  # 提取字段标签(如果有Java文件路径)
97
100
  field_tags = None
98
101
  if java_file_path:
102
+ # 存储当前Java文件路径,供其他方法使用
103
+ self._current_java_file_path = java_file_path
99
104
  field_tags = self.java_parser.extract_field_tags(java_file_path)
100
105
  if field_tags:
101
106
  self.logger.info(f" 🏷️ 从Java源码提取到 {len(field_tags)} 个字段标签")
107
+ else:
108
+ self._current_java_file_path = None
102
109
 
103
110
  # 解析字段信息
104
111
  self._parse_fields(message_def, bytes_data, objects, field_tags)
@@ -177,9 +184,14 @@ class InfoDecoder:
177
184
  # 检查是否包含oneof字段(通过查找'<'字符,ord=60)
178
185
  oneof_positions = [i for i, byte_val in enumerate(bytes_data) if byte_val == 60]
179
186
 
187
+ self.logger.info(f" 🔍 字节码长度: {len(bytes_data)}, oneof_positions: {oneof_positions}")
188
+ self.logger.info(f" 🔍 字节码内容: {[f'{b:02x}' for b in bytes_data[:20]]}...")
189
+
180
190
  if oneof_positions:
191
+ self.logger.info(f" 🎯 检测到oneof结构,调用_parse_oneof_fields")
181
192
  self._parse_oneof_fields(message_def, bytes_data, objects, oneof_positions)
182
193
  else:
194
+ self.logger.info(f" 🎯 未检测到oneof结构,调用_parse_regular_fields")
183
195
  self._parse_regular_fields(message_def, bytes_data, objects, field_tags)
184
196
 
185
197
  except Exception as e:
@@ -204,19 +216,20 @@ class InfoDecoder:
204
216
  self.logger.info(f" 📊 Objects数组: {objects}")
205
217
 
206
218
  # 如果有字段标签,优先使用Java源码信息
219
+ self.logger.info(f" 🔍 field_tags类型: {type(field_tags)}, 值: {field_tags}, 布尔值: {bool(field_tags)}")
207
220
  if field_tags:
208
221
  self.logger.info(f" 🏷️ 使用Java源码字段标签: {field_tags}")
209
222
  self._parse_fields_with_java_tags(message_def, bytes_data, objects, field_tags)
210
223
  else:
211
224
  # 回退到字节码解析
212
- self.logger.info(f" 🔍 回退到字节码解析")
225
+ self.logger.info(f" 🔍 回退到字节码解析,field_tags为: {field_tags}")
213
226
  self._parse_fields_from_bytecode(message_def, bytes_data, objects, field_start)
214
227
 
215
228
  self.logger.info(f" 📊 字段解析完成,共解析 {len(message_def.fields)} 个字段")
216
229
 
217
230
  def _parse_fields_with_java_tags(self, message_def: MessageDefinition, bytes_data: List[int], objects: List[str], field_tags: dict) -> None:
218
231
  """
219
- 使用Java源码提取的字段标签解析字段
232
+ 使用Java源码提取的字段标签解析字段,同时处理objects数组中的类引用
220
233
 
221
234
  Args:
222
235
  message_def: 消息定义对象
@@ -224,7 +237,48 @@ class InfoDecoder:
224
237
  objects: 对象数组
225
238
  field_tags: Java源码提取的字段标签映射
226
239
  """
240
+ self.logger.info(f" 🔍 开始_parse_fields_with_java_tags")
241
+ self.logger.info(f" 📊 Objects数组: {objects}")
242
+ self.logger.info(f" 📊 字段标签: {field_tags}")
243
+
244
+ # 首先检查是否有oneof结构
245
+ oneof_field = None
246
+ class_refs = []
247
+
248
+ for obj in objects:
249
+ if not obj.endswith('_') and obj not in ['action', 'actionCase', 'result', 'resultCase'] and len(obj) > 2:
250
+ class_refs.append(obj)
251
+ elif obj.endswith('_') and obj.rstrip('_') + 'Case_' in objects:
252
+ oneof_field = obj
253
+
254
+ # 分离普通字段和oneof相关的对象
255
+ oneof_related_objects = set()
256
+ if oneof_field and class_refs:
257
+ # 标记oneof相关的对象
258
+ oneof_related_objects.add(oneof_field) # action_
259
+ oneof_related_objects.add(oneof_field.rstrip('_') + 'Case_') # actionCase_
260
+ oneof_related_objects.update(class_refs) # SkipRecovery, InstallationInfo
261
+ self.logger.info(f" 🎯 检测到oneof结构: {oneof_field},包含类引用: {class_refs}")
262
+
263
+ # 特殊处理:如果oneof字段的名称与field_tags中的某些字段名相似,
264
+ # 说明这些field_tags可能是错误的(来自Java常量的错误转换)
265
+ # 例如:result_ oneof 但 field_tags 中有 singlesearchresult_, bulksearChresult_
266
+ oneof_base_name = oneof_field.rstrip('_').lower()
267
+ for field_name in list(field_tags.keys()):
268
+ field_base_name = field_name.rstrip('_').lower()
269
+ # 如果字段名包含oneof的基础名称,或者包含类引用的名称,很可能是错误的字段标签
270
+ if (oneof_base_name in field_base_name or
271
+ any(class_ref.lower() in field_base_name for class_ref in class_refs)):
272
+ oneof_related_objects.add(field_name)
273
+ self.logger.debug(f" 🔍 标记疑似错误字段标签: {field_name} (与oneof {oneof_field} 或类引用相关)")
274
+
275
+ # 处理普通字段(从field_tags中提取,排除oneof相关的字段)
227
276
  for field_name_raw, field_tag in field_tags.items():
277
+ # 跳过oneof相关的字段
278
+ if field_name_raw in oneof_related_objects:
279
+ self.logger.debug(f" ⏭️ 跳过oneof相关字段: {field_name_raw}")
280
+ continue
281
+
228
282
  # 清理字段名
229
283
  field_name = self._clean_field_name(field_name_raw)
230
284
 
@@ -297,9 +351,6 @@ class InfoDecoder:
297
351
  # 特殊情况处理:根据字段名修正类型
298
352
  field_type_name = self._refine_field_type(field_name, field_type_name, 0) # 使用0作为占位符
299
353
 
300
- # 确定字段规则(基于Java类型判断是否为repeated)
301
- # 已经在上面确定了rule,这里不需要重复处理
302
-
303
354
  # 创建字段定义
304
355
  field_def = FieldDefinition(
305
356
  name=field_name,
@@ -310,6 +361,418 @@ class InfoDecoder:
310
361
 
311
362
  message_def.fields.append(field_def)
312
363
  self.logger.info(f" ✅ 添加字段: {field_name} = {field_tag} ({rule} {field_type_name})")
364
+
365
+ # 最后处理objects数组中的类引用,检测oneof结构
366
+ self._parse_oneof_from_objects(message_def, objects, field_tags)
367
+
368
+ def _parse_oneof_from_objects(self, message_def: MessageDefinition, objects: List[str], field_tags: dict) -> None:
369
+ """
370
+ 从objects数组中解析oneof结构和类引用
371
+
372
+ Args:
373
+ message_def: 消息定义对象
374
+ objects: 对象数组
375
+ field_tags: 已知的字段标签映射
376
+ """
377
+ # 查找类引用(以.class结尾的对象)
378
+ class_refs = []
379
+ oneof_field = None
380
+
381
+ # 首先识别已经作为字段类型的类引用,避免重复处理
382
+ # 通过Java源码分析结果来识别已使用的类引用
383
+ used_class_refs = set()
384
+
385
+ # 从已解析的字段中提取使用的类引用
386
+ for field in message_def.fields:
387
+ # 如果字段类型不是基础类型,就是类引用
388
+ if (field.type_name not in ['string', 'int32', 'int64', 'long', 'int', 'bool', 'double', 'float', 'bytes'] and
389
+ not field.type_name.startswith('google.protobuf.') and
390
+ not field.type_name.startswith('repeated ') and
391
+ not field.type_name.startswith('map<')):
392
+
393
+ # 提取类名(去掉包名部分)
394
+ class_name = field.type_name.split('.')[-1]
395
+ used_class_refs.add(class_name)
396
+ self.logger.debug(f" 📝 从已解析字段 {field.name} 中识别类引用: {class_name}")
397
+
398
+ # 识别连续的类引用(oneof选项)
399
+ consecutive_class_refs = []
400
+ for i, obj in enumerate(objects):
401
+ if not obj.endswith('_') and obj not in ['action', 'actionCase', 'result', 'resultCase'] and len(obj) > 2:
402
+ consecutive_class_refs.append(i)
403
+
404
+ # 如果有多个连续的类引用,它们很可能是oneof选项
405
+ is_oneof_group = len(consecutive_class_refs) > 1
406
+ if is_oneof_group:
407
+ # 检查是否连续
408
+ for i in range(len(consecutive_class_refs) - 1):
409
+ if consecutive_class_refs[i+1] - consecutive_class_refs[i] == 1:
410
+ # 连续的类引用,很可能是oneof选项
411
+ self.logger.debug(f" 🔍 检测到连续类引用,可能是oneof选项: {[objects[idx] for idx in consecutive_class_refs]}")
412
+ break
413
+
414
+ for i, obj in enumerate(objects):
415
+ # 检查是否是类引用(不以_结尾且不是基础字段名)
416
+ if not obj.endswith('_') and obj not in ['action', 'actionCase'] and len(obj) > 2:
417
+ # 跳过已经作为字段类型的类引用
418
+ if obj in used_class_refs:
419
+ self.logger.debug(f" ⏭️ 跳过已用作字段类型的类引用: {obj}")
420
+ continue
421
+ # 这可能是一个独立的类引用(用于oneof)
422
+ class_refs.append((i, obj))
423
+ self.logger.info(f" 🔍 发现独立类引用: {obj}")
424
+ elif obj.endswith('_') and obj.rstrip('_') + 'Case_' in objects:
425
+ # 发现oneof字段(通过检查是否有对应的Case字段)
426
+ oneof_field = obj
427
+ self.logger.info(f" 🔍 发现oneof字段: {obj}")
428
+
429
+ if class_refs and oneof_field:
430
+ # 这是一个oneof结构
431
+ self._create_oneof_structure(message_def, oneof_field, class_refs, field_tags)
432
+ elif class_refs:
433
+ # 有类引用但没有明确的oneof字段,可能是直接的消息字段
434
+ self._create_message_fields_from_class_refs(message_def, class_refs, field_tags)
435
+
436
+ def _create_oneof_structure(self, message_def: MessageDefinition, oneof_field: str, class_refs: List[tuple], field_tags: dict) -> None:
437
+ """
438
+ 创建oneof结构
439
+
440
+ Args:
441
+ message_def: 消息定义对象
442
+ oneof_field: oneof字段名(如"action_")
443
+ class_refs: 类引用列表[(索引, 类名)]
444
+ field_tags: 字段标签映射
445
+ """
446
+ from models.message_definition import OneofDefinition
447
+
448
+ # 创建oneof定义
449
+ oneof_name = self._clean_field_name(oneof_field)
450
+ oneof_def = OneofDefinition(name=oneof_name)
451
+
452
+ # 收集已使用的字段标签
453
+ used_tags = set()
454
+ for field in message_def.fields:
455
+ used_tags.add(field.tag)
456
+ for oneof in message_def.oneofs:
457
+ for field in oneof.fields:
458
+ used_tags.add(field.tag)
459
+
460
+ # 为每个类引用创建oneof字段
461
+ for _, class_name in class_refs:
462
+ # 查找对应的字段标签
463
+ field_tag = self._find_tag_for_class(class_name, field_tags, used_tags)
464
+ if field_tag is None:
465
+ self.logger.warning(f" ⚠️ 无法找到类 {class_name} 的字段标签")
466
+ continue
467
+
468
+ # 生成字段名:SkipRecovery -> skip_recovery
469
+ field_name = self._class_name_to_field_name(class_name)
470
+
471
+ # 为oneof字段生成正确的类型名
472
+ # 如果是内部类(包含$),需要使用完整的类名来生成类型名
473
+ full_class_name = self._infer_full_dependency_class_name(class_name)
474
+ if '$' in full_class_name:
475
+ # 对于内部类,使用完整的类名部分(如Service$SkipRecovery)
476
+ class_part = full_class_name.split('.')[-1] # Service$SkipRecovery
477
+ clean_class_name = class_part.replace('$', '') # ServiceSkipRecovery
478
+ else:
479
+ # 对于普通类,直接清理$符号
480
+ clean_class_name = class_name.replace('$', '')
481
+
482
+ # 创建字段定义
483
+ field_def = FieldDefinition(
484
+ name=field_name,
485
+ type_name=clean_class_name,
486
+ tag=field_tag,
487
+ rule='optional'
488
+ )
489
+
490
+ # 保存完整的类名信息,用于导入路径生成
491
+ field_def.full_class_name = self._infer_full_dependency_class_name(class_name)
492
+
493
+ oneof_def.fields.append(field_def)
494
+ self.logger.info(f" ✅ 添加oneof字段: {field_name} = {field_tag} ({clean_class_name})")
495
+
496
+ # 记录依赖类
497
+ self._record_dependency_class(class_name)
498
+
499
+ if oneof_def.fields:
500
+ message_def.oneofs.append(oneof_def)
501
+ self.logger.info(f" 🎯 创建oneof: {oneof_name} (包含 {len(oneof_def.fields)} 个字段)")
502
+
503
+ def _create_message_fields_from_class_refs(self, message_def: MessageDefinition, class_refs: List[tuple], field_tags: dict) -> None:
504
+ """
505
+ 从类引用创建普通消息字段
506
+
507
+ Args:
508
+ message_def: 消息定义对象
509
+ class_refs: 类引用列表[(索引, 类名)]
510
+ field_tags: 字段标签映射
511
+ """
512
+ # 收集已使用的字段标签
513
+ used_tags = set()
514
+ for field in message_def.fields:
515
+ used_tags.add(field.tag)
516
+ for oneof in message_def.oneofs:
517
+ for field in oneof.fields:
518
+ used_tags.add(field.tag)
519
+
520
+ for _, class_name in class_refs:
521
+ # 查找对应的字段标签
522
+ field_tag = self._find_tag_for_class(class_name, field_tags, used_tags)
523
+ if field_tag is None:
524
+ self.logger.warning(f" ⚠️ 无法找到类 {class_name} 的字段标签")
525
+ continue
526
+
527
+ # 生成字段名
528
+ field_name = self._class_name_to_field_name(class_name)
529
+
530
+ # 为oneof字段生成正确的类型名
531
+ # 如果是内部类(包含$),需要使用完整的类名来生成类型名
532
+ full_class_name = self._infer_full_dependency_class_name(class_name)
533
+ if '$' in full_class_name:
534
+ # 对于内部类,使用完整的类名部分(如Service$SkipRecovery)
535
+ class_part = full_class_name.split('.')[-1] # Service$SkipRecovery
536
+ clean_class_name = class_part.replace('$', '') # ServiceSkipRecovery
537
+ else:
538
+ # 对于普通类,直接清理$符号
539
+ clean_class_name = class_name.replace('$', '')
540
+
541
+ # 创建字段定义
542
+ field_def = FieldDefinition(
543
+ name=field_name,
544
+ type_name=clean_class_name,
545
+ tag=field_tag,
546
+ rule='optional'
547
+ )
548
+
549
+ # 保存完整的类名信息,用于导入路径生成
550
+ field_def.full_class_name = full_class_name
551
+
552
+ message_def.fields.append(field_def)
553
+ self.logger.info(f" ✅ 添加消息字段: {field_name} = {field_tag} ({clean_class_name})")
554
+
555
+ # 记录依赖类
556
+ self._record_dependency_class(class_name)
557
+
558
+ def _find_tag_for_class(self, class_name: str, field_tags: dict, used_tags: set = None) -> Optional[int]:
559
+ """
560
+ 为类名查找对应的字段标签,完全基于Java源码分析
561
+
562
+ Args:
563
+ class_name: 类名(如"SkipRecovery")
564
+ field_tags: 字段标签映射
565
+ used_tags: 已使用的字段标签集合
566
+
567
+ Returns:
568
+ 字段标签,如果找不到则返回None
569
+ """
570
+ if used_tags is None:
571
+ used_tags = set()
572
+ # 完全基于Java源码分析,智能推断字段标签
573
+
574
+ # 1. 直接匹配:类名转换为字段名
575
+ direct_field_name = self._to_snake_case(class_name) + '_'
576
+ if direct_field_name in field_tags:
577
+ self.logger.debug(f" 🎯 直接匹配类 {class_name}: {direct_field_name} -> {field_tags[direct_field_name]}")
578
+ return field_tags[direct_field_name]
579
+
580
+ # 2. 小写匹配
581
+ lowercase_field_name = class_name.lower() + '_'
582
+ if lowercase_field_name in field_tags:
583
+ self.logger.debug(f" 🎯 小写匹配类 {class_name}: {lowercase_field_name} -> {field_tags[lowercase_field_name]}")
584
+ return field_tags[lowercase_field_name]
585
+
586
+ # 3. 智能模式匹配:处理各种命名约定
587
+ # 移除常见后缀并尝试匹配
588
+ class_variants = [class_name]
589
+ if class_name.endswith('Result'):
590
+ class_variants.append(class_name[:-6]) # 移除Result
591
+ if class_name.endswith('Info'):
592
+ class_variants.append(class_name[:-4]) # 移除Info
593
+ if class_name.endswith('Data'):
594
+ class_variants.append(class_name[:-4]) # 移除Data
595
+
596
+ for variant in class_variants:
597
+ for suffix in ['_', 'result_', 'info_', 'data_']:
598
+ test_field_name = variant.lower() + suffix
599
+ if test_field_name in field_tags:
600
+ self.logger.debug(f" 🎯 变体匹配类 {class_name}: {test_field_name} -> {field_tags[test_field_name]}")
601
+ return field_tags[test_field_name]
602
+
603
+ # 4. 模糊匹配:在字段名中查找类名
604
+ class_lower = class_name.lower()
605
+ for field_name, tag in field_tags.items():
606
+ # 跳过已使用的标签
607
+ if tag in used_tags:
608
+ self.logger.debug(f" ⏭️ 跳过已使用的标签: {field_name} -> {tag}")
609
+ continue
610
+
611
+ field_clean = field_name.lower().rstrip('_')
612
+ if class_lower == field_clean or class_lower in field_clean:
613
+ self.logger.debug(f" 🎯 模糊匹配类 {class_name}: {field_name} -> {tag}")
614
+ return tag
615
+
616
+ # 5. 使用Java源码分析器获取更精确的信息
617
+ if self.java_source_analyzer:
618
+ tag = self._get_class_field_tag_from_source(class_name)
619
+ if tag is not None:
620
+ self.logger.debug(f" 🎯 源码分析匹配类 {class_name}: -> {tag}")
621
+ return tag
622
+
623
+ return None
624
+
625
+ def _get_class_field_tag_from_source(self, class_name: str) -> Optional[int]:
626
+ """
627
+ 从Java源码中获取类对应的字段标签
628
+
629
+ Args:
630
+ class_name: 类名
631
+
632
+ Returns:
633
+ 字段标签,如果找不到则返回None
634
+ """
635
+ if not self.java_source_analyzer:
636
+ return None
637
+
638
+ try:
639
+ # 尝试通过Java源码分析器获取字段标签
640
+ # 查找形如 CLASSNAME_FIELD_NUMBER 的常量
641
+ possible_constant_names = [
642
+ f"{class_name.upper()}_FIELD_NUMBER",
643
+ f"{self._to_snake_case(class_name).upper()}_FIELD_NUMBER",
644
+ f"{class_name.upper()}",
645
+ f"{class_name.upper()}_NUMBER",
646
+ # 处理缩写情况,如 SkipRecovery -> SKIP_FIELD_NUMBER
647
+ f"{class_name.upper()[:4]}_FIELD_NUMBER", # 前4个字符
648
+ f"{class_name.upper()[:5]}_FIELD_NUMBER", # 前5个字符
649
+ f"{class_name.upper()[:6]}_FIELD_NUMBER", # 前6个字符
650
+ # 处理常见的缩写模式
651
+ f"{class_name.replace('Recovery', '').upper()}_FIELD_NUMBER", # 移除Recovery
652
+ f"{class_name.replace('Info', '').upper()}_FIELD_NUMBER", # 移除Info
653
+ f"{class_name.replace('Data', '').upper()}_FIELD_NUMBER", # 移除Data
654
+ f"{class_name.replace('Result', '').upper()}_FIELD_NUMBER", # 移除Result
655
+ ]
656
+
657
+ for constant_name in possible_constant_names:
658
+ # 尝试从Java源码中提取常量值
659
+ tag = self.java_source_analyzer._extract_constant_value(constant_name)
660
+ if tag is not None:
661
+ self.logger.debug(f" 🎯 从源码获取字段标签: {class_name} -> {constant_name} = {tag}")
662
+ return tag
663
+
664
+ return None
665
+
666
+ except Exception as e:
667
+ self.logger.debug(f" ⚠️ 源码分析失败: {e}")
668
+ return None
669
+
670
+ def _to_snake_case(self, camel_str: str) -> str:
671
+ """
672
+ 将驼峰命名转换为蛇形命名
673
+
674
+ Args:
675
+ camel_str: 驼峰命名字符串
676
+
677
+ Returns:
678
+ 蛇形命名字符串
679
+ """
680
+ # 处理$符号
681
+ camel_str = camel_str.replace('$', '_')
682
+
683
+ # 在大写字母前插入下划线
684
+ result = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', camel_str)
685
+
686
+ # 转换为小写
687
+ result = result.lower()
688
+
689
+ # 清理连续的下划线
690
+ result = re.sub(r'_+', '_', result)
691
+
692
+ # 移除首尾下划线
693
+ return result.strip('_')
694
+
695
+ def _class_name_to_field_name(self, class_name: str) -> str:
696
+ """
697
+ 将类名转换为字段名
698
+
699
+ Args:
700
+ class_name: 类名(如"SkipRecovery")
701
+
702
+ Returns:
703
+ 字段名(如"skip_recovery")
704
+ """
705
+ # 移除$符号并转换为snake_case
706
+ clean_name = class_name.replace('$', '')
707
+ return self._to_snake_case(clean_name)
708
+
709
+ def _record_dependency_class(self, class_name: str) -> None:
710
+ """
711
+ 记录依赖类,用于后续处理
712
+
713
+ Args:
714
+ class_name: 类名
715
+ """
716
+ # 记录依赖类到实例变量中,供重构器获取
717
+ if not hasattr(self, 'discovered_dependencies'):
718
+ self.discovered_dependencies = []
719
+
720
+ # 构造完整的类名,智能处理内部类情况
721
+ full_class_name = self._infer_full_dependency_class_name(class_name)
722
+
723
+ if full_class_name not in self.discovered_dependencies:
724
+ self.discovered_dependencies.append(full_class_name)
725
+ self.logger.info(f" 📦 记录依赖类: {full_class_name}")
726
+
727
+ def _infer_full_dependency_class_name(self, class_name: str) -> str:
728
+ """
729
+ 推断依赖类的完整类名,特别处理内部类情况
730
+
731
+ Args:
732
+ class_name: 简单类名(如SkipRecovery)
733
+
734
+ Returns:
735
+ 完整的类名
736
+ """
737
+ # 如果已经是完整类名,直接返回
738
+ if '.' in class_name:
739
+ return class_name
740
+
741
+ # 尝试从当前处理的类推断包名和外部类
742
+ current_class = getattr(self, '_current_processing_class', None)
743
+ if current_class and '$' in current_class:
744
+ # 当前类是内部类,依赖类可能是同一外部类的其他内部类
745
+ # 如:com.example.Service$CompleteRequest -> com.example.Service$SkipRecovery
746
+ parts = current_class.split('$')
747
+ if len(parts) >= 2:
748
+ outer_class = parts[0] # com.example.Service
749
+ full_class_name = f"{outer_class}${class_name}"
750
+ self.logger.debug(f" 🔍 推断内部类依赖: {class_name} -> {full_class_name}")
751
+ return full_class_name
752
+
753
+ # 如果当前类有包名,使用相同的包名
754
+ if current_class and '.' in current_class:
755
+ # 提取包名部分
756
+ last_dot = current_class.rfind('.')
757
+ if last_dot != -1:
758
+ package_name = current_class[:last_dot]
759
+ full_class_name = f"{package_name}.{class_name}"
760
+ self.logger.debug(f" 🔍 推断包级依赖: {class_name} -> {full_class_name}")
761
+ return full_class_name
762
+
763
+ # 最后的备选方案:使用默认包名
764
+ full_class_name = f"com.truecaller.accountonboarding.v1.{class_name}"
765
+ self.logger.debug(f" 🔍 使用默认包名: {class_name} -> {full_class_name}")
766
+ return full_class_name
767
+
768
+ def get_discovered_dependencies(self) -> List[str]:
769
+ """
770
+ 获取在解析过程中发现的依赖类
771
+
772
+ Returns:
773
+ 依赖类名列表
774
+ """
775
+ return getattr(self, 'discovered_dependencies', [])
313
776
 
314
777
  def _determine_field_rule(self, field_type_byte: int, field_type_name: str = None, java_type: str = None) -> str:
315
778
  """
@@ -766,7 +1229,45 @@ class InfoDecoder:
766
1229
  def _parse_oneof_fields(self, message_def: MessageDefinition, bytes_data: List[int],
767
1230
  objects: List[str], oneof_positions: List[int]) -> None:
768
1231
  """
769
- 解析oneof字段
1232
+ 解析oneof字段(增强版,支持Java源码字段标签)
1233
+
1234
+ Args:
1235
+ message_def: 消息定义对象
1236
+ bytes_data: 字节数组
1237
+ objects: 对象数组
1238
+ oneof_positions: oneof标记位置列表
1239
+ """
1240
+ self.logger.info(f" 🎯 开始解析oneof字段")
1241
+ self.logger.info(f" 📊 Objects数组: {objects}")
1242
+ self.logger.info(f" 📊 oneof_positions: {oneof_positions}")
1243
+
1244
+ # 首先尝试从Java源码获取字段标签
1245
+ field_tags = None
1246
+ if hasattr(self, 'java_parser') and self.java_parser:
1247
+ try:
1248
+ # 获取当前类的Java文件路径
1249
+ java_file_path = getattr(self, '_current_java_file_path', None)
1250
+ if java_file_path:
1251
+ field_tags = self.java_parser.extract_field_tags(java_file_path)
1252
+ if field_tags:
1253
+ self.logger.info(f" 🏷️ 获取到字段标签: {field_tags}")
1254
+ except Exception as e:
1255
+ self.logger.debug(f" ⚠️ 获取字段标签失败: {e}")
1256
+
1257
+ # 如果有字段标签,使用新的解析逻辑
1258
+ if field_tags:
1259
+ self.logger.info(f" 🎯 使用Java源码字段标签解析oneof")
1260
+ # 先处理普通字段
1261
+ self._parse_fields_with_java_tags(message_def, bytes_data, objects, field_tags)
1262
+ else:
1263
+ # 回退到旧的字节码解析逻辑
1264
+ self.logger.info(f" 🎯 使用字节码解析oneof")
1265
+ self._parse_oneof_fields_legacy(message_def, bytes_data, objects, oneof_positions)
1266
+
1267
+ def _parse_oneof_fields_legacy(self, message_def: MessageDefinition, bytes_data: List[int],
1268
+ objects: List[str], oneof_positions: List[int]) -> None:
1269
+ """
1270
+ 传统的oneof字段解析方法(作为备用)
770
1271
 
771
1272
  Args:
772
1273
  message_def: 消息定义对象