reproto 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. .git/COMMIT_EDITMSG +29 -1
  2. .git/index +0 -0
  3. .git/logs/HEAD +4 -0
  4. .git/logs/refs/heads/iyue +4 -0
  5. .git/logs/refs/remotes/gitlab/iyue +4 -0
  6. .git/logs/refs/remotes/origin/iyue +4 -0
  7. .git/objects/00/81a7e1ec3251cc192ed2b73d5be897593fb872 +0 -0
  8. .git/objects/00/885cebf557ff261574deb93cb96449d06db01c +0 -0
  9. .git/objects/02/33f5b60263e5dc6d041c0b223a0724eb650faa +0 -0
  10. .git/objects/03/3fab23ccc74b707a31f6f37dc5df42c100aac0 +0 -0
  11. .git/objects/07/557fbd1d149ce51af8e98e8ca8590757a89dfa +0 -0
  12. .git/objects/07/b71df33935c14ddf99e3480c04eca4685028bc +0 -0
  13. .git/objects/0b/e20ea749c6ae1075ae98f9426e00b0a4a235af +0 -0
  14. .git/objects/12/f9cc4ad0b4f0af4f7bae379f281b2cebe7cc7f +0 -0
  15. .git/objects/13/5f50fea51c72f977f48d06a7584aba0f61e260 +0 -0
  16. .git/objects/1b/97584ebada3e93d7a2cfa4a1aae0b79c414d20 +0 -0
  17. .git/objects/1b/f0c1ad9586578e8332d061f7648dcb041ec063 +0 -0
  18. .git/objects/1e/46816d16e7c9db7575f1964403c3daa105be5c +0 -0
  19. .git/objects/24/d78e796570a8572a03bc1dd26608a7cfb506f8 +2 -0
  20. .git/objects/27/24208ca2442e8ab9b453d0eb230fa41243b270 +0 -0
  21. .git/objects/2a/fe93d8bcbeab9e136d8b6766604c32b3610314 +0 -0
  22. .git/objects/2d/2b812ca27c477f9e1b2c2706a5eb795ffcf8eb +0 -0
  23. .git/objects/2d/8a0da260a710010ae62be134ac1cea6ceecfd1 +0 -0
  24. .git/objects/2d/e8ecbb5ab5de1a032bef3f4606ce5fa7c6c4e8 +0 -0
  25. .git/objects/30/9347e5681d80bd3c7949882e27090dd9070d16 +0 -0
  26. .git/objects/33/633c9df669ff8cf38638717937a54990814268 +0 -0
  27. .git/objects/39/993e3600bf4ab82aa361b738ee97a108787450 +0 -0
  28. .git/objects/3b/ab663710fd6b43d9372313fced9043c4cb07dd +0 -0
  29. .git/objects/3e/b3f1273caf6814dfa69325ccbd9fd1340cf20a +0 -0
  30. .git/objects/3f/b8830f516342a0ae1cb7c34b65016827cb9570 +4 -0
  31. .git/objects/41/f40c22247de377be99e30784229f3f128508a2 +0 -0
  32. .git/objects/45/fbc774dedb61c7c205ea732f59a8dca8d13555 +3 -0
  33. .git/objects/48/cb11e75518a53be14146018214110986fade67 +0 -0
  34. .git/objects/4f/c6dc41f9c0a1a8e0eedd3ba49c43d78d0dbaba +0 -0
  35. .git/objects/59/4c23f158ccbd0a4288f9ea046d06160195afbf +0 -0
  36. .git/objects/5b/d4d87753b79e9157817f0c2e6964a731052854 +0 -0
  37. .git/objects/60/5cb6fd6a9f8894ad4d43a9b8e4785c1b3b0e17 +1 -0
  38. .git/objects/60/f61a0ea50091eac8d344c86597375cbdfc2785 +0 -0
  39. .git/objects/63/ddda2a403efaab3f4c6597b3a73a7b1147adb5 +0 -0
  40. .git/objects/65/0189fe083bd711e45d463b229a72be619abad2 +0 -0
  41. .git/objects/66/663db35bfec8ef5f1a5b1c840fde1bb62a0eb8 +0 -0
  42. .git/objects/66/6c5c5fc30435228116fa08c9d821bebaaa8926 +0 -0
  43. .git/objects/7c/ef0adfb28fd774bc78061c6f088e1ef9b050f6 +0 -0
  44. .git/objects/7d/dc129188a10c68ab756ef2cacb292c76920403 +0 -0
  45. .git/objects/80/17038e0f7818a44a742f77c86f4f88ed768fcd +0 -0
  46. .git/objects/87/c7db6c91c17a2df84b56d30bd24a0f6b9dbdd9 +0 -0
  47. .git/objects/8a/0ed0ed8886fbc823e8d2258fa6d18699e94e25 +0 -0
  48. .git/objects/8d/d857b3d0ab3f5cd2e9173d532ef86e30df8eda +0 -0
  49. .git/objects/92/984cf67b2c25d435468a8218daa26ba0466054 +0 -0
  50. .git/objects/93/140b54b1fb9116ee214afee8abf2c72a232487 +0 -0
  51. .git/objects/93/68f2c32f83054ab072b7c9686d8baa0bad7f12 +4 -0
  52. .git/objects/9a/5ad062be9f6e001f4237a598a08981aba731e6 +0 -0
  53. .git/objects/9e/23448ac58f907d9d123c32bdccedbb3d6741b5 +0 -0
  54. .git/objects/a0/d192999af7e2cbfa6a9ccd04d720a04e5a06d5 +0 -0
  55. .git/objects/a1/655e0cb323c300562f97dcc67d5a446908c8ec +0 -0
  56. .git/objects/a5/38cc82cef7c49500d3522220f0f60a9ebc1ae6 +0 -0
  57. .git/objects/a9/41063a7ce89c353fa24378ec7c3f12f08f9df8 +0 -0
  58. .git/objects/a9/cc7923c34a4c97c5711d6309672f41d46c612a +0 -0
  59. .git/objects/ac/5c983d949d8c928bb022badf801e45e75e785e +0 -0
  60. .git/objects/af/c9cc15629847447063e86a82b8b56abb4fc08f +0 -0
  61. .git/objects/b0/82ca2c1b5a03edff25da3c2b2b573d049877e9 +0 -0
  62. .git/objects/b1/db1c131cf32916028342c0037ce8eb57a8eb26 +0 -0
  63. .git/objects/b2/8334b94392b8af397a05ed702690fa6c9ab1ca +0 -0
  64. .git/objects/b8/7c89dcfce9e244ff5ef6a4bd394de12e8c8092 +0 -0
  65. .git/objects/bc/e98bdb71c8681acb460195fdcbbe5d36290976 +0 -0
  66. .git/objects/c1/87d5e047eca86cfd8d444be2987aaa3f62c4d6 +0 -0
  67. .git/objects/c4/c2da96b0bb8db2acb0e6615cf340c7e51af26b +0 -0
  68. .git/objects/c5/13a96e7584636b20b12280c029750d5bc3da1e +0 -0
  69. .git/objects/c7/c34283697bd3cce07db53953eda25ee7cc371e +0 -0
  70. .git/objects/c9/d60d922a04b87587cd67b0abf9fe5a7b7b76cd +0 -0
  71. .git/objects/d2/69b1676dbf32f76a7c405d0b4ea6a70ac3a626 +0 -0
  72. .git/objects/d3/5a918b1d9125ad35d60e08b181323df3246f1a +0 -0
  73. .git/objects/d8/eaf86669fbfd10497570c1784db1ed2696b588 +0 -0
  74. .git/objects/d9/3bd435c8c7ad4efb83dff04d5450fabb9e3faf +0 -0
  75. .git/objects/d9/90e6d553577d37ebce8b28b3015ecbde038b42 +0 -0
  76. .git/objects/da/13cc15bcd8ee39c81f36dee7f179a569ecab0b +0 -0
  77. .git/objects/e3/27755808d88c7ae5c06c229cf18bd0519646df +0 -0
  78. .git/objects/e4/4c1d8a90207ac082d8ab7ff0db66708e2ebc31 +0 -0
  79. .git/objects/e5/83e7c40be934d16a1fa2e973487b395d930f42 +0 -0
  80. .git/objects/ed/1ae867d5e63195845afc58d88c38ecbdea97df +0 -0
  81. .git/objects/ef/f44e5099da27f7fb1ef14bb34902ccf4250b89 +0 -0
  82. .git/objects/f5/1be495b96272fa2e47f30071aed35ac1f0dd2c +0 -0
  83. .git/objects/f8/ed595d25bd9d500e765a792c513878f7ddb1f7 +0 -0
  84. .git/objects/fd/0bc07dc3c95e6168ab6d367d9eca139ac1e539 +0 -0
  85. .git/refs/heads/iyue +1 -1
  86. .git/refs/remotes/gitlab/iyue +1 -1
  87. .git/refs/remotes/origin/iyue +1 -1
  88. .gitignore +2 -1
  89. README.md +104 -190
  90. core/__init__.py +23 -0
  91. core/info_decoder.py +520 -10
  92. core/reconstructor.py +159 -21
  93. generation/__init__.py +17 -0
  94. generation/proto_generator.py +62 -16
  95. include/google/protobuf/any.proto +162 -0
  96. include/google/protobuf/api.proto +207 -0
  97. include/google/protobuf/compiler/plugin.proto +180 -0
  98. include/google/protobuf/cpp_features.proto +67 -0
  99. include/google/protobuf/descriptor.proto +1417 -0
  100. include/google/protobuf/duration.proto +115 -0
  101. include/google/protobuf/empty.proto +51 -0
  102. include/google/protobuf/field_mask.proto +245 -0
  103. include/google/protobuf/go_features.proto +80 -0
  104. include/google/protobuf/java_features.proto +130 -0
  105. include/google/protobuf/source_context.proto +48 -0
  106. include/google/protobuf/struct.proto +95 -0
  107. include/google/protobuf/timestamp.proto +144 -0
  108. include/google/protobuf/type.proto +193 -0
  109. include/google/protobuf/wrappers.proto +157 -0
  110. main.py +53 -56
  111. models/__init__.py +31 -24
  112. parsing/__init__.py +22 -0
  113. parsing/enum_parser.py +10 -2
  114. parsing/java_parser.py +302 -13
  115. pyproject.toml +1 -1
  116. reproto-0.1.3.dist-info/METADATA +209 -0
  117. {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/RECORD +125 -31
  118. utils/__init__.py +40 -0
  119. utils/builtin_proto.py +269 -0
  120. utils/file_cache.py +8 -1
  121. utils/report_utils.py +71 -0
  122. utils/type_index.py +8 -1
  123. utils/type_utils.py +39 -6
  124. core/bytecode_parser.py +0 -274
  125. reproto-0.1.1.dist-info/METADATA +0 -295
  126. {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/WHEEL +0 -0
  127. {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/entry_points.txt +0 -0
main.py CHANGED
@@ -16,17 +16,33 @@ Author: AI Assistant
16
16
 
17
17
  import sys
18
18
  import argparse
19
+ import traceback
19
20
  from pathlib import Path
20
21
 
21
- # 导入项目模块
22
+ # 确保项目根目录在Python路径中
22
23
  import os
23
24
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
24
- from core.reconstructor import ProtoReconstructor
25
- from utils.logger import setup_logger, get_logger
26
25
 
26
+ # 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
27
+ try:
28
+ # 相对导入(包环境)
29
+ from .core.reconstructor import ProtoReconstructor
30
+ from .utils.logger import setup_logger, get_logger
31
+ from .utils.report_utils import print_results_summary
32
+ except ImportError:
33
+ # 绝对导入(开发环境)
34
+ from core.reconstructor import ProtoReconstructor
35
+ from utils.logger import setup_logger, get_logger
36
+ from utils.report_utils import print_results_summary
27
37
 
28
- def parse_arguments():
29
- """解析命令行参数"""
38
+
39
+ def parse_arguments() -> argparse.Namespace:
40
+ """
41
+ 解析命令行参数
42
+
43
+ Returns:
44
+ 解析后的命令行参数对象
45
+ """
30
46
  parser = argparse.ArgumentParser(
31
47
  description='从JADX反编译的Java源码重构Protobuf .proto文件',
32
48
  formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -72,8 +88,19 @@ def parse_arguments():
72
88
  return parser.parse_args()
73
89
 
74
90
 
75
- def validate_arguments(args):
76
- """验证命令行参数"""
91
+ def validate_arguments(args: argparse.Namespace) -> tuple[Path, str, Path]:
92
+ """
93
+ 验证命令行参数的有效性
94
+
95
+ Args:
96
+ args: 解析后的命令行参数
97
+
98
+ Returns:
99
+ 验证后的路径元组: (sources_path, root_class, output_path)
100
+
101
+ Raises:
102
+ SystemExit: 当参数无效时退出程序
103
+ """
77
104
  logger = get_logger("main")
78
105
 
79
106
  # 验证源码目录
@@ -92,13 +119,13 @@ def validate_arguments(args):
92
119
  logger.error("应该是完整的类名,如: com.example.Model")
93
120
  sys.exit(1)
94
121
 
95
- # 输出目录可以不存在,会自动创建
122
+ # 验证输出目录
96
123
  output_path = Path(args.output_dir)
97
124
  if output_path.exists() and not output_path.is_dir():
98
125
  logger.error(f"输出路径存在但不是目录: {output_path}")
99
126
  sys.exit(1)
100
127
 
101
- # 验证日志目录
128
+ # 创建日志目录
102
129
  log_path = Path(args.log_dir)
103
130
  try:
104
131
  log_path.mkdir(parents=True, exist_ok=True)
@@ -109,11 +136,19 @@ def validate_arguments(args):
109
136
  return sources_path.resolve(), args.root_class, output_path.resolve()
110
137
 
111
138
 
112
- def main():
113
- """主函数"""
139
+ def main() -> None:
140
+ """
141
+ 主函数:协调整个重构过程
142
+
143
+ 处理流程:
144
+ 1. 解析和验证命令行参数
145
+ 2. 初始化日志系统
146
+ 3. 创建重构器并执行重构
147
+ 4. 输出结果统计信息
148
+ """
114
149
  args = None
115
150
  try:
116
- # 解析参数
151
+ # 解析和验证参数
117
152
  args = parse_arguments()
118
153
 
119
154
  # 初始化日志系统
@@ -130,62 +165,25 @@ def main():
130
165
  logger.info(f"📁 日志目录: {args.log_dir}")
131
166
  logger.info(f"🎯 根类: {root_class}")
132
167
 
133
- # 创建重构器并执行
168
+ # 创建重构器并执行重构
134
169
  reconstructor = ProtoReconstructor(sources_dir, output_dir)
135
170
  reconstructor._verbose = args.verbose # 传递verbose标志
136
171
  results = reconstructor.reconstruct_from_root(root_class)
137
172
 
138
- # 输出详细的结果统计
139
- if results:
140
- # 统计成功和失败的数量
141
- success_count = len(results)
142
- failed_count = len(reconstructor.failed_classes) if hasattr(reconstructor, 'failed_classes') else 0
143
- total_attempted = success_count + failed_count
144
-
145
- logger.success("✅ 重构完成!")
146
- logger.info(f"📊 处理统计: 共尝试处理 {total_attempted} 个类型")
147
-
148
- message_count = sum(1 for r in results.values() if hasattr(r, 'fields'))
149
- enum_count = sum(1 for r in results.values() if hasattr(r, 'values'))
150
-
151
- logger.info(f" - ✅ 成功: {success_count} 个 (消息: {message_count}, 枚举: {enum_count})")
152
-
153
- # 显示失败的类
154
- if hasattr(reconstructor, 'failed_classes') and reconstructor.failed_classes:
155
- logger.warning(f" - ❌ 失败: {failed_count} 个")
156
- for failed_class, reason in reconstructor.failed_classes.items():
157
- logger.warning(f" • {failed_class}: {reason}")
158
-
159
- # 显示跳过的类
160
- if hasattr(reconstructor, 'skipped_classes') and reconstructor.skipped_classes:
161
- skipped_count = len(reconstructor.skipped_classes)
162
- logger.info(f" - ⏭️ 跳过: {skipped_count} 个 (基础类型或已处理)")
163
- if args.verbose:
164
- for skipped_class, reason in reconstructor.skipped_classes.items():
165
- logger.info(f" • {skipped_class}: {reason}")
166
- else:
167
- logger.error("❌ 没有生成任何proto文件!")
168
- logger.error("请检查:")
169
- logger.error(" 1. 根类名是否正确")
170
- logger.error(" 2. Java源码目录是否包含对应的文件")
171
- logger.error(" 3. 类是否为protobuf消息类")
172
-
173
- # 显示详细的失败信息
174
- if hasattr(reconstructor, 'failed_classes') and reconstructor.failed_classes:
175
- logger.error("失败的类:")
176
- for failed_class, reason in reconstructor.failed_classes.items():
177
- logger.error(f" • {failed_class}: {reason}")
178
-
179
- sys.exit(1)
173
+ # 输出结果统计
174
+ print_results_summary(reconstructor, results, logger, args.verbose)
180
175
 
181
176
  except KeyboardInterrupt:
177
+ # 处理用户中断
182
178
  if args:
183
179
  logger = get_logger("main")
184
180
  logger.warning("⚠️ 操作被用户中断")
185
181
  else:
186
182
  print("\n⚠️ 操作被用户中断")
187
183
  sys.exit(1)
184
+
188
185
  except Exception as e:
186
+ # 处理其他异常
189
187
  if args:
190
188
  logger = get_logger("main")
191
189
  logger.error(f"❌ 重构失败: {e}")
@@ -194,7 +192,6 @@ def main():
194
192
  else:
195
193
  print(f"\n❌ 重构失败: {e}")
196
194
  if hasattr(args, 'verbose') and args.verbose:
197
- import traceback
198
195
  traceback.print_exc()
199
196
  sys.exit(1)
200
197
 
models/__init__.py CHANGED
@@ -1,27 +1,34 @@
1
- from dataclasses import dataclass, field
2
- from typing import List, Dict, Set
1
+ """
2
+ 模型定义模块
3
3
 
4
- @dataclass
5
- class FieldDefinition:
6
- """表示一个 Protobuf 消息中的字段。"""
7
- name: str
8
- type: str
9
- tag: int
10
- rule: str # "optional", "repeated", or "oneof"
4
+ 包含Protobuf消息、字段、枚举等数据结构的定义
5
+ 所有具体的类定义都在相应的子模块中
6
+ """
11
7
 
12
- @dataclass
13
- class OneofDefinition:
14
- """表示一个 Protobuf oneof 块。"""
15
- name: str
16
- fields: List[FieldDefinition] = field(default_factory=list)
8
+ # 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
9
+ try:
10
+ # 相对导入(包环境)
11
+ from .message_definition import (
12
+ MessageDefinition,
13
+ FieldDefinition,
14
+ OneofDefinition,
15
+ EnumDefinition,
16
+ EnumValueDefinition
17
+ )
18
+ except ImportError:
19
+ # 绝对导入(开发环境)
20
+ from models.message_definition import (
21
+ MessageDefinition,
22
+ FieldDefinition,
23
+ OneofDefinition,
24
+ EnumDefinition,
25
+ EnumValueDefinition
26
+ )
17
27
 
18
- @dataclass
19
- class MessageDefinition:
20
- """表示一个完整的 Protobuf 消息的定义。"""
21
- name: str
22
- package: str
23
- info_string: str = ""
24
- objects: List[str] = field(default_factory=list)
25
- fields: List[FieldDefinition] = field(default_factory=list)
26
- oneofs: Dict[str, OneofDefinition] = field(default_factory=dict)
27
- dependencies: Set[str] = field(default_factory=set)
28
+ __all__ = [
29
+ 'MessageDefinition',
30
+ 'FieldDefinition',
31
+ 'OneofDefinition',
32
+ 'EnumDefinition',
33
+ 'EnumValueDefinition'
34
+ ]
parsing/__init__.py CHANGED
@@ -1,3 +1,25 @@
1
+ """
2
+ 解析器模块
3
+
4
+ 包含各种源码解析器:
5
+ - Java解析器:解析Java源码,提取字段标签和类型信息
6
+ - 枚举解析器:专门处理Java枚举类的解析
7
+ """
8
+
9
+ # 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
10
+ try:
11
+ # 相对导入(包环境)
12
+ from .java_parser import JavaParser
13
+ from .enum_parser import EnumParser
14
+ except ImportError:
15
+ # 绝对导入(开发环境)
16
+ from parsing.java_parser import JavaParser
17
+ from parsing.enum_parser import EnumParser
18
+
19
+ __all__ = [
20
+ 'JavaParser',
21
+ 'EnumParser'
22
+ ]
1
23
 
2
24
 
3
25
 
parsing/enum_parser.py CHANGED
@@ -8,8 +8,16 @@
8
8
  import re
9
9
  import os
10
10
  from typing import List, Optional, Dict, Tuple
11
- from models.message_definition import EnumDefinition, EnumValueDefinition
12
- from utils.logger import get_logger
11
+
12
+ # 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
13
+ try:
14
+ # 相对导入(包环境)
15
+ from ..models.message_definition import EnumDefinition, EnumValueDefinition
16
+ from ..utils.logger import get_logger
17
+ except ImportError:
18
+ # 绝对导入(开发环境)
19
+ from models.message_definition import EnumDefinition, EnumValueDefinition
20
+ from utils.logger import get_logger
13
21
 
14
22
 
15
23
  class EnumParser:
parsing/java_parser.py CHANGED
@@ -11,7 +11,13 @@ import re
11
11
  from pathlib import Path
12
12
  from typing import Optional, Tuple, List
13
13
 
14
- from utils.logger import get_logger
14
+ # 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
15
+ try:
16
+ # 相对导入(包环境)
17
+ from ..utils.logger import get_logger
18
+ except ImportError:
19
+ # 绝对导入(开发环境)
20
+ from utils.logger import get_logger
15
21
 
16
22
 
17
23
  class JavaParser:
@@ -27,12 +33,13 @@ class JavaParser:
27
33
  self.logger = get_logger("java_parser")
28
34
 
29
35
  # 匹配newMessageInfo调用的正则表达式
30
- # 格式:GeneratedMessageLite.newMessageInfo(DEFAULT_INSTANCE, "字节码", new Object[]{对象数组})
36
+ # 格式1:GeneratedMessageLite.newMessageInfo(DEFAULT_INSTANCE, "字节码", new Object[]{对象数组})
37
+ # 格式2:GeneratedMessageLite.newMessageInfo(DEFAULT_INSTANCE, "字节码", null)
31
38
  self.new_message_info_pattern = re.compile(
32
39
  r'GeneratedMessageLite\.newMessageInfo\(\s*'
33
40
  r'DEFAULT_INSTANCE\s*,\s*'
34
41
  r'"([^"]*)",\s*' # 捕获字节码字符串
35
- r'new\s+Object\[\]\s*\{([^}]*)\}', # 捕获对象数组
42
+ r'(?:new\s+Object\[\]\s*\{([^}]*)\}|null)', # 捕获对象数组或null
36
43
  re.DOTALL
37
44
  )
38
45
 
@@ -50,17 +57,26 @@ class JavaParser:
50
57
  # 读取Java文件内容
51
58
  content = java_file_path.read_text(encoding='utf-8')
52
59
 
53
- # 查找newMessageInfo调用
54
- match = self.new_message_info_pattern.search(content)
55
- if not match:
60
+ # 查找所有newMessageInfo调用
61
+ matches = self.new_message_info_pattern.findall(content)
62
+ if not matches:
56
63
  return None, None
57
64
 
58
- # 提取字节码字符串和对象数组字符串
59
- info_string = match.group(1)
60
- objects_str = match.group(2)
65
+ # 获取主类的字段标签
66
+ main_class_field_tags = self._extract_field_number_constants(content)
61
67
 
62
- # 解析对象数组
63
- objects_array = self._parse_objects_array(objects_str)
68
+ # 根据字段匹配选择正确的newMessageInfo调用
69
+ best_match = self._select_main_class_message_info(matches, main_class_field_tags)
70
+ if not best_match:
71
+ return None, None
72
+
73
+ info_string, objects_str = best_match
74
+
75
+ # 解析对象数组(允许null/空对象数组)
76
+ if objects_str and objects_str.strip():
77
+ objects_array = self._parse_objects_array(objects_str)
78
+ else:
79
+ objects_array = [] # 空消息的情况(null或空字符串)
64
80
 
65
81
  return info_string, objects_array
66
82
 
@@ -68,6 +84,142 @@ class JavaParser:
68
84
  self.logger.error(f"❌ 解析Java文件失败 {java_file_path}: {e}")
69
85
  return None, None
70
86
 
87
+ def parse_inner_class_from_file(self, java_file_path: Path, inner_class_name: str) -> Tuple[Optional[str], Optional[List[str]]]:
88
+ """
89
+ 从外部类文件中解析指定的内部类的protobuf信息
90
+
91
+ Args:
92
+ java_file_path: 外部类Java文件路径
93
+ inner_class_name: 内部类名(如"SkipRecovery")
94
+
95
+ Returns:
96
+ Tuple[字节码字符串, 对象数组] 或 (None, None) 如果解析失败
97
+ """
98
+ try:
99
+ # 读取Java文件内容
100
+ content = java_file_path.read_text(encoding='utf-8')
101
+
102
+ # 提取指定内部类的内容
103
+ inner_class_content = self._extract_inner_class_content(content, inner_class_name)
104
+ if not inner_class_content:
105
+ self.logger.error(f"❌ 在文件 {java_file_path} 中找不到内部类: {inner_class_name}")
106
+ return None, None
107
+
108
+ # 在内部类内容中查找newMessageInfo调用
109
+ matches = self.new_message_info_pattern.findall(inner_class_content)
110
+ if not matches:
111
+ self.logger.debug(f" 🔍 内部类 {inner_class_name} 中没有找到newMessageInfo调用")
112
+ return None, None
113
+
114
+ # 对于内部类,通常只有一个newMessageInfo调用
115
+ info_string, objects_str = matches[0]
116
+
117
+ # 解析对象数组(允许null/空对象数组)
118
+ if objects_str and objects_str.strip():
119
+ objects_array = self._parse_objects_array(objects_str)
120
+ else:
121
+ objects_array = [] # 空消息的情况(null或空字符串)
122
+
123
+ # 为内部类单独提取字段标签
124
+ self._extract_inner_class_field_tags(java_file_path, inner_class_name, inner_class_content)
125
+
126
+ self.logger.info(f" ✅ 成功解析内部类 {inner_class_name}: {len(objects_array)} 个对象")
127
+ return info_string, objects_array
128
+
129
+ except Exception as e:
130
+ self.logger.error(f"❌ 解析内部类失败 {inner_class_name} from {java_file_path}: {e}")
131
+ return None, None
132
+
133
+ def _extract_inner_class_content(self, content: str, inner_class_name: str) -> Optional[str]:
134
+ """
135
+ 从Java文件内容中提取指定内部类的内容
136
+
137
+ Args:
138
+ content: Java文件内容
139
+ inner_class_name: 内部类名
140
+
141
+ Returns:
142
+ 内部类的内容,如果找不到则返回None
143
+ """
144
+ # 查找内部类定义的开始
145
+ # 匹配模式:public static final class InnerClassName extends ...
146
+ class_pattern = rf'public\s+static\s+final\s+class\s+{re.escape(inner_class_name)}\s+extends\s+'
147
+ match = re.search(class_pattern, content)
148
+
149
+ if not match:
150
+ # 尝试更宽松的匹配
151
+ class_pattern = rf'class\s+{re.escape(inner_class_name)}\s+extends\s+'
152
+ match = re.search(class_pattern, content)
153
+
154
+ if not match:
155
+ return None
156
+
157
+ # 找到类定义的开始位置
158
+ class_start = match.start()
159
+
160
+ # 从类定义开始位置往前找到第一个'{'
161
+ content_from_class = content[class_start:]
162
+ brace_start = content_from_class.find('{')
163
+ if brace_start == -1:
164
+ return None
165
+
166
+ # 从第一个'{'开始,找到匹配的'}'
167
+ start_pos = class_start + brace_start + 1
168
+ brace_count = 1
169
+ pos = start_pos
170
+
171
+ while pos < len(content) and brace_count > 0:
172
+ if content[pos] == '{':
173
+ brace_count += 1
174
+ elif content[pos] == '}':
175
+ brace_count -= 1
176
+ pos += 1
177
+
178
+ if brace_count == 0:
179
+ # 找到了匹配的结束位置
180
+ inner_class_content = content[start_pos:pos-1]
181
+ return inner_class_content
182
+
183
+ return None
184
+
185
+ def _extract_inner_class_field_tags(self, java_file_path: Path, inner_class_name: str, inner_class_content: str) -> None:
186
+ """
187
+ 为内部类提取字段标签,并缓存到文件系统中
188
+
189
+ Args:
190
+ java_file_path: Java文件路径
191
+ inner_class_name: 内部类名
192
+ inner_class_content: 内部类的源码内容
193
+ """
194
+ # 从内部类内容中提取字段标签
195
+ field_tags = self._extract_field_tags_from_source(inner_class_content)
196
+
197
+ if field_tags:
198
+ # 创建内部类的虚拟文件路径,用于缓存字段标签
199
+ # 如:Service$CompleteOnboardingRequest.java -> Service$CompleteOnboardingRequest$InstallationInfo.java
200
+ virtual_file_path = java_file_path.parent / f"{java_file_path.stem}${inner_class_name}.java"
201
+
202
+ # 将字段标签缓存到虚拟文件路径
203
+ self._cache_field_tags(virtual_file_path, field_tags)
204
+
205
+ self.logger.debug(f" 🏷️ 为内部类 {inner_class_name} 提取了 {len(field_tags)} 个字段标签")
206
+ else:
207
+ self.logger.debug(f" 🔍 内部类 {inner_class_name} 没有字段标签")
208
+
209
+ def _cache_field_tags(self, file_path: Path, field_tags: dict) -> None:
210
+ """
211
+ 缓存字段标签到内存中,供后续使用
212
+
213
+ Args:
214
+ file_path: 文件路径(可能是虚拟路径)
215
+ field_tags: 字段标签字典
216
+ """
217
+ # 使用简单的内存缓存
218
+ if not hasattr(self, '_field_tags_cache'):
219
+ self._field_tags_cache = {}
220
+
221
+ self._field_tags_cache[str(file_path)] = field_tags
222
+
71
223
  def _parse_objects_array(self, objects_str: str) -> List[str]:
72
224
  """
73
225
  解析Java对象数组字符串
@@ -183,6 +335,85 @@ class JavaParser:
183
335
 
184
336
  return part if part else None
185
337
 
338
+ def _select_main_class_message_info(self, matches: List[tuple], main_class_field_tags: dict) -> Optional[tuple]:
339
+ """
340
+ 根据字段匹配选择主类的newMessageInfo调用
341
+
342
+ Args:
343
+ matches: 所有newMessageInfo匹配结果 [(info_string, objects_str), ...]
344
+ main_class_field_tags: 主类字段标签 {const_name: tag_value}
345
+
346
+ Returns:
347
+ 主类的newMessageInfo匹配结果或None
348
+ """
349
+ if not matches:
350
+ return None
351
+
352
+ if len(matches) == 1:
353
+ return matches[0]
354
+
355
+ # 从主类字段标签生成期望的字段名列表
356
+ expected_fields = set()
357
+ for const_name in main_class_field_tags.keys():
358
+ field_name = self._const_name_to_field_name(const_name)
359
+ expected_fields.add(field_name)
360
+
361
+ self.logger.debug(f" 🔍 主类期望字段: {expected_fields}")
362
+
363
+ best_match = None
364
+ best_score = 0
365
+
366
+ for info_string, objects_str in matches:
367
+ # 解析对象数组(允许null/空对象数组)
368
+ if objects_str and objects_str.strip():
369
+ objects_array = self._parse_objects_array(objects_str)
370
+ else:
371
+ objects_array = [] # 空消息的情况(null或空字符串)
372
+
373
+ # 计算匹配分数
374
+ score = self._calculate_field_match_score(objects_array, expected_fields)
375
+
376
+ self.logger.debug(f" 📊 对象数组 {objects_array[:3]}... 匹配分数: {score}")
377
+
378
+ if score > best_score:
379
+ best_score = score
380
+ best_match = (info_string, objects_str)
381
+
382
+ if best_match:
383
+ self.logger.info(f" ✅ 选择主类newMessageInfo,匹配分数: {best_score}")
384
+ else:
385
+ self.logger.warning(f" ⚠️ 无法找到匹配的主类newMessageInfo")
386
+
387
+ return best_match
388
+
389
+ def _calculate_field_match_score(self, objects_array: List[str], expected_fields: set) -> int:
390
+ """
391
+ 计算对象数组与期望字段的匹配分数
392
+
393
+ Args:
394
+ objects_array: 解析后的对象数组
395
+ expected_fields: 期望的字段名集合
396
+
397
+ Returns:
398
+ 匹配分数(匹配的字段数量)
399
+ """
400
+ if not objects_array or not expected_fields:
401
+ return 0
402
+
403
+ match_count = 0
404
+
405
+ for obj in objects_array:
406
+ # 检查是否是字段名(以_结尾的字符串)
407
+ if obj.endswith('_'):
408
+ if obj in expected_fields:
409
+ match_count += 1
410
+ # 检查是否是类引用(不以_结尾,可能是oneof字段的类型)
411
+ elif not obj.endswith('_'):
412
+ # 类引用也算作有效匹配,但权重较低
413
+ match_count += 0.5
414
+
415
+ return int(match_count)
416
+
186
417
  def parse_enum_file(self, java_file_path: Path) -> Optional[List[tuple]]:
187
418
  """
188
419
  解析Java枚举文件,提取枚举值和数值
@@ -369,6 +600,18 @@ class JavaParser:
369
600
  字段标签映射 {field_name: tag} 或 None 如果解析失败
370
601
  """
371
602
  try:
603
+ # 首先检查是否有缓存的字段标签(用于内部类)
604
+ if hasattr(self, '_field_tags_cache'):
605
+ cache_key = str(java_file_path)
606
+ if cache_key in self._field_tags_cache:
607
+ self.logger.debug(f" 🎯 使用缓存的字段标签: {java_file_path}")
608
+ return self._field_tags_cache[cache_key]
609
+
610
+ # 检查文件是否存在(虚拟文件路径不存在)
611
+ if not java_file_path.exists():
612
+ self.logger.debug(f" 📁 文件不存在,跳过字段标签提取: {java_file_path}")
613
+ return None
614
+
372
615
  # 读取Java文件内容
373
616
  content = java_file_path.read_text(encoding='utf-8')
374
617
 
@@ -451,7 +694,7 @@ class JavaParser:
451
694
 
452
695
  def _extract_field_number_constants(self, content: str) -> dict:
453
696
  """
454
- 提取所有FIELD_NUMBER常量
697
+ 提取主类的FIELD_NUMBER常量(排除内部类)
455
698
 
456
699
  Args:
457
700
  content: Java文件内容
@@ -459,19 +702,65 @@ class JavaParser:
459
702
  Returns:
460
703
  常量名到值的映射 {const_name: value}
461
704
  """
705
+ # 首先找到主类的定义范围
706
+ main_class_content = self._extract_main_class_content(content)
707
+
462
708
  field_tag_pattern = re.compile(
463
709
  r'\s*public\s+static\s+final\s+int\s+' # 允许行首有空白字符
464
710
  r'([A-Z0-9_]+)_FIELD_NUMBER\s*=\s*(\d+)\s*;' # 允许常量名包含数字
465
711
  )
466
712
 
467
713
  constants = {}
468
- for match in field_tag_pattern.finditer(content):
714
+ for match in field_tag_pattern.finditer(main_class_content):
469
715
  const_name = match.group(1)
470
716
  tag_value = int(match.group(2))
471
717
  constants[const_name] = tag_value
472
718
 
473
719
  return constants
474
720
 
721
+ def _extract_main_class_content(self, content: str) -> str:
722
+ """
723
+ 提取主类的内容,排除内部类定义
724
+
725
+ Args:
726
+ content: Java文件内容
727
+
728
+ Returns:
729
+ 主类内容(不包括内部类)
730
+ """
731
+ # 找到主类的开始位置
732
+ main_class_pattern = re.compile(
733
+ r'public\s+final\s+class\s+\w+(?:\$\w+)?\s+extends\s+GeneratedMessageLite.*?\{',
734
+ re.DOTALL
735
+ )
736
+
737
+ main_class_match = main_class_pattern.search(content)
738
+ if not main_class_match:
739
+ # 如果找不到主类定义,返回整个内容作为回退
740
+ return content
741
+
742
+ main_class_start = main_class_match.end()
743
+
744
+ # 找到第一个内部类的开始位置
745
+ inner_class_pattern = re.compile(
746
+ r'\n\s*public\s+(?:static\s+)?(?:final\s+)?class\s+\w+\s+extends\s+',
747
+ re.MULTILINE
748
+ )
749
+
750
+ # 从主类开始位置搜索内部类
751
+ content_from_main_class = content[main_class_start:]
752
+ inner_class_match = inner_class_pattern.search(content_from_main_class)
753
+
754
+ if inner_class_match:
755
+ # 如果找到内部类,只返回主类部分
756
+ inner_class_start = main_class_start + inner_class_match.start()
757
+ main_class_content = content[:inner_class_start]
758
+ else:
759
+ # 如果没有内部类,返回整个内容
760
+ main_class_content = content
761
+
762
+ return main_class_content
763
+
475
764
  def _generate_possible_constant_names(self, field_name: str) -> List[str]:
476
765
  """
477
766
  根据字段名生成可能的常量名
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "reproto"
7
- version = "0.1.1"
7
+ version = "0.1.3"
8
8
  description = "一个强大的逆向工程工具, 能够从任何使用Google Protobuf Lite的Android应用中自动重构出完整的.proto文件结构."
9
9
  readme = "README.md"
10
10
  license = "Proprietary"