reproto 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .git/COMMIT_EDITMSG +29 -1
- .git/index +0 -0
- .git/logs/HEAD +4 -0
- .git/logs/refs/heads/iyue +4 -0
- .git/logs/refs/remotes/gitlab/iyue +4 -0
- .git/logs/refs/remotes/origin/iyue +4 -0
- .git/objects/00/81a7e1ec3251cc192ed2b73d5be897593fb872 +0 -0
- .git/objects/00/885cebf557ff261574deb93cb96449d06db01c +0 -0
- .git/objects/02/33f5b60263e5dc6d041c0b223a0724eb650faa +0 -0
- .git/objects/03/3fab23ccc74b707a31f6f37dc5df42c100aac0 +0 -0
- .git/objects/07/557fbd1d149ce51af8e98e8ca8590757a89dfa +0 -0
- .git/objects/07/b71df33935c14ddf99e3480c04eca4685028bc +0 -0
- .git/objects/0b/e20ea749c6ae1075ae98f9426e00b0a4a235af +0 -0
- .git/objects/12/f9cc4ad0b4f0af4f7bae379f281b2cebe7cc7f +0 -0
- .git/objects/13/5f50fea51c72f977f48d06a7584aba0f61e260 +0 -0
- .git/objects/1b/97584ebada3e93d7a2cfa4a1aae0b79c414d20 +0 -0
- .git/objects/1b/f0c1ad9586578e8332d061f7648dcb041ec063 +0 -0
- .git/objects/1e/46816d16e7c9db7575f1964403c3daa105be5c +0 -0
- .git/objects/24/d78e796570a8572a03bc1dd26608a7cfb506f8 +2 -0
- .git/objects/27/24208ca2442e8ab9b453d0eb230fa41243b270 +0 -0
- .git/objects/2a/fe93d8bcbeab9e136d8b6766604c32b3610314 +0 -0
- .git/objects/2d/2b812ca27c477f9e1b2c2706a5eb795ffcf8eb +0 -0
- .git/objects/2d/8a0da260a710010ae62be134ac1cea6ceecfd1 +0 -0
- .git/objects/2d/e8ecbb5ab5de1a032bef3f4606ce5fa7c6c4e8 +0 -0
- .git/objects/30/9347e5681d80bd3c7949882e27090dd9070d16 +0 -0
- .git/objects/33/633c9df669ff8cf38638717937a54990814268 +0 -0
- .git/objects/39/993e3600bf4ab82aa361b738ee97a108787450 +0 -0
- .git/objects/3b/ab663710fd6b43d9372313fced9043c4cb07dd +0 -0
- .git/objects/3e/b3f1273caf6814dfa69325ccbd9fd1340cf20a +0 -0
- .git/objects/3f/b8830f516342a0ae1cb7c34b65016827cb9570 +4 -0
- .git/objects/41/f40c22247de377be99e30784229f3f128508a2 +0 -0
- .git/objects/45/fbc774dedb61c7c205ea732f59a8dca8d13555 +3 -0
- .git/objects/48/cb11e75518a53be14146018214110986fade67 +0 -0
- .git/objects/4f/c6dc41f9c0a1a8e0eedd3ba49c43d78d0dbaba +0 -0
- .git/objects/59/4c23f158ccbd0a4288f9ea046d06160195afbf +0 -0
- .git/objects/5b/d4d87753b79e9157817f0c2e6964a731052854 +0 -0
- .git/objects/60/5cb6fd6a9f8894ad4d43a9b8e4785c1b3b0e17 +1 -0
- .git/objects/60/f61a0ea50091eac8d344c86597375cbdfc2785 +0 -0
- .git/objects/63/ddda2a403efaab3f4c6597b3a73a7b1147adb5 +0 -0
- .git/objects/65/0189fe083bd711e45d463b229a72be619abad2 +0 -0
- .git/objects/66/663db35bfec8ef5f1a5b1c840fde1bb62a0eb8 +0 -0
- .git/objects/66/6c5c5fc30435228116fa08c9d821bebaaa8926 +0 -0
- .git/objects/7c/ef0adfb28fd774bc78061c6f088e1ef9b050f6 +0 -0
- .git/objects/7d/dc129188a10c68ab756ef2cacb292c76920403 +0 -0
- .git/objects/80/17038e0f7818a44a742f77c86f4f88ed768fcd +0 -0
- .git/objects/87/c7db6c91c17a2df84b56d30bd24a0f6b9dbdd9 +0 -0
- .git/objects/8a/0ed0ed8886fbc823e8d2258fa6d18699e94e25 +0 -0
- .git/objects/8d/d857b3d0ab3f5cd2e9173d532ef86e30df8eda +0 -0
- .git/objects/92/984cf67b2c25d435468a8218daa26ba0466054 +0 -0
- .git/objects/93/140b54b1fb9116ee214afee8abf2c72a232487 +0 -0
- .git/objects/93/68f2c32f83054ab072b7c9686d8baa0bad7f12 +4 -0
- .git/objects/9a/5ad062be9f6e001f4237a598a08981aba731e6 +0 -0
- .git/objects/9e/23448ac58f907d9d123c32bdccedbb3d6741b5 +0 -0
- .git/objects/a0/d192999af7e2cbfa6a9ccd04d720a04e5a06d5 +0 -0
- .git/objects/a1/655e0cb323c300562f97dcc67d5a446908c8ec +0 -0
- .git/objects/a5/38cc82cef7c49500d3522220f0f60a9ebc1ae6 +0 -0
- .git/objects/a9/41063a7ce89c353fa24378ec7c3f12f08f9df8 +0 -0
- .git/objects/a9/cc7923c34a4c97c5711d6309672f41d46c612a +0 -0
- .git/objects/ac/5c983d949d8c928bb022badf801e45e75e785e +0 -0
- .git/objects/af/c9cc15629847447063e86a82b8b56abb4fc08f +0 -0
- .git/objects/b0/82ca2c1b5a03edff25da3c2b2b573d049877e9 +0 -0
- .git/objects/b1/db1c131cf32916028342c0037ce8eb57a8eb26 +0 -0
- .git/objects/b2/8334b94392b8af397a05ed702690fa6c9ab1ca +0 -0
- .git/objects/b8/7c89dcfce9e244ff5ef6a4bd394de12e8c8092 +0 -0
- .git/objects/bc/e98bdb71c8681acb460195fdcbbe5d36290976 +0 -0
- .git/objects/c1/87d5e047eca86cfd8d444be2987aaa3f62c4d6 +0 -0
- .git/objects/c4/c2da96b0bb8db2acb0e6615cf340c7e51af26b +0 -0
- .git/objects/c5/13a96e7584636b20b12280c029750d5bc3da1e +0 -0
- .git/objects/c7/c34283697bd3cce07db53953eda25ee7cc371e +0 -0
- .git/objects/c9/d60d922a04b87587cd67b0abf9fe5a7b7b76cd +0 -0
- .git/objects/d2/69b1676dbf32f76a7c405d0b4ea6a70ac3a626 +0 -0
- .git/objects/d3/5a918b1d9125ad35d60e08b181323df3246f1a +0 -0
- .git/objects/d8/eaf86669fbfd10497570c1784db1ed2696b588 +0 -0
- .git/objects/d9/3bd435c8c7ad4efb83dff04d5450fabb9e3faf +0 -0
- .git/objects/d9/90e6d553577d37ebce8b28b3015ecbde038b42 +0 -0
- .git/objects/da/13cc15bcd8ee39c81f36dee7f179a569ecab0b +0 -0
- .git/objects/e3/27755808d88c7ae5c06c229cf18bd0519646df +0 -0
- .git/objects/e4/4c1d8a90207ac082d8ab7ff0db66708e2ebc31 +0 -0
- .git/objects/e5/83e7c40be934d16a1fa2e973487b395d930f42 +0 -0
- .git/objects/ed/1ae867d5e63195845afc58d88c38ecbdea97df +0 -0
- .git/objects/ef/f44e5099da27f7fb1ef14bb34902ccf4250b89 +0 -0
- .git/objects/f5/1be495b96272fa2e47f30071aed35ac1f0dd2c +0 -0
- .git/objects/f8/ed595d25bd9d500e765a792c513878f7ddb1f7 +0 -0
- .git/objects/fd/0bc07dc3c95e6168ab6d367d9eca139ac1e539 +0 -0
- .git/refs/heads/iyue +1 -1
- .git/refs/remotes/gitlab/iyue +1 -1
- .git/refs/remotes/origin/iyue +1 -1
- .gitignore +2 -1
- README.md +104 -190
- core/__init__.py +23 -0
- core/info_decoder.py +520 -10
- core/reconstructor.py +159 -21
- generation/__init__.py +17 -0
- generation/proto_generator.py +62 -16
- include/google/protobuf/any.proto +162 -0
- include/google/protobuf/api.proto +207 -0
- include/google/protobuf/compiler/plugin.proto +180 -0
- include/google/protobuf/cpp_features.proto +67 -0
- include/google/protobuf/descriptor.proto +1417 -0
- include/google/protobuf/duration.proto +115 -0
- include/google/protobuf/empty.proto +51 -0
- include/google/protobuf/field_mask.proto +245 -0
- include/google/protobuf/go_features.proto +80 -0
- include/google/protobuf/java_features.proto +130 -0
- include/google/protobuf/source_context.proto +48 -0
- include/google/protobuf/struct.proto +95 -0
- include/google/protobuf/timestamp.proto +144 -0
- include/google/protobuf/type.proto +193 -0
- include/google/protobuf/wrappers.proto +157 -0
- main.py +53 -56
- models/__init__.py +31 -24
- parsing/__init__.py +22 -0
- parsing/enum_parser.py +10 -2
- parsing/java_parser.py +302 -13
- pyproject.toml +1 -1
- reproto-0.1.3.dist-info/METADATA +209 -0
- {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/RECORD +125 -31
- utils/__init__.py +40 -0
- utils/builtin_proto.py +269 -0
- utils/file_cache.py +8 -1
- utils/report_utils.py +71 -0
- utils/type_index.py +8 -1
- utils/type_utils.py +39 -6
- core/bytecode_parser.py +0 -274
- reproto-0.1.1.dist-info/METADATA +0 -295
- {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/WHEEL +0 -0
- {reproto-0.1.1.dist-info → reproto-0.1.3.dist-info}/entry_points.txt +0 -0
main.py
CHANGED
@@ -16,17 +16,33 @@ Author: AI Assistant
|
|
16
16
|
|
17
17
|
import sys
|
18
18
|
import argparse
|
19
|
+
import traceback
|
19
20
|
from pathlib import Path
|
20
21
|
|
21
|
-
#
|
22
|
+
# 确保项目根目录在Python路径中
|
22
23
|
import os
|
23
24
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
24
|
-
from core.reconstructor import ProtoReconstructor
|
25
|
-
from utils.logger import setup_logger, get_logger
|
26
25
|
|
26
|
+
# 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
|
27
|
+
try:
|
28
|
+
# 相对导入(包环境)
|
29
|
+
from .core.reconstructor import ProtoReconstructor
|
30
|
+
from .utils.logger import setup_logger, get_logger
|
31
|
+
from .utils.report_utils import print_results_summary
|
32
|
+
except ImportError:
|
33
|
+
# 绝对导入(开发环境)
|
34
|
+
from core.reconstructor import ProtoReconstructor
|
35
|
+
from utils.logger import setup_logger, get_logger
|
36
|
+
from utils.report_utils import print_results_summary
|
27
37
|
|
28
|
-
|
29
|
-
|
38
|
+
|
39
|
+
def parse_arguments() -> argparse.Namespace:
|
40
|
+
"""
|
41
|
+
解析命令行参数
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
解析后的命令行参数对象
|
45
|
+
"""
|
30
46
|
parser = argparse.ArgumentParser(
|
31
47
|
description='从JADX反编译的Java源码重构Protobuf .proto文件',
|
32
48
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
@@ -72,8 +88,19 @@ def parse_arguments():
|
|
72
88
|
return parser.parse_args()
|
73
89
|
|
74
90
|
|
75
|
-
def validate_arguments(args):
|
76
|
-
"""
|
91
|
+
def validate_arguments(args: argparse.Namespace) -> tuple[Path, str, Path]:
|
92
|
+
"""
|
93
|
+
验证命令行参数的有效性
|
94
|
+
|
95
|
+
Args:
|
96
|
+
args: 解析后的命令行参数
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
验证后的路径元组: (sources_path, root_class, output_path)
|
100
|
+
|
101
|
+
Raises:
|
102
|
+
SystemExit: 当参数无效时退出程序
|
103
|
+
"""
|
77
104
|
logger = get_logger("main")
|
78
105
|
|
79
106
|
# 验证源码目录
|
@@ -92,13 +119,13 @@ def validate_arguments(args):
|
|
92
119
|
logger.error("应该是完整的类名,如: com.example.Model")
|
93
120
|
sys.exit(1)
|
94
121
|
|
95
|
-
#
|
122
|
+
# 验证输出目录
|
96
123
|
output_path = Path(args.output_dir)
|
97
124
|
if output_path.exists() and not output_path.is_dir():
|
98
125
|
logger.error(f"输出路径存在但不是目录: {output_path}")
|
99
126
|
sys.exit(1)
|
100
127
|
|
101
|
-
#
|
128
|
+
# 创建日志目录
|
102
129
|
log_path = Path(args.log_dir)
|
103
130
|
try:
|
104
131
|
log_path.mkdir(parents=True, exist_ok=True)
|
@@ -109,11 +136,19 @@ def validate_arguments(args):
|
|
109
136
|
return sources_path.resolve(), args.root_class, output_path.resolve()
|
110
137
|
|
111
138
|
|
112
|
-
def main():
|
113
|
-
"""
|
139
|
+
def main() -> None:
|
140
|
+
"""
|
141
|
+
主函数:协调整个重构过程
|
142
|
+
|
143
|
+
处理流程:
|
144
|
+
1. 解析和验证命令行参数
|
145
|
+
2. 初始化日志系统
|
146
|
+
3. 创建重构器并执行重构
|
147
|
+
4. 输出结果统计信息
|
148
|
+
"""
|
114
149
|
args = None
|
115
150
|
try:
|
116
|
-
#
|
151
|
+
# 解析和验证参数
|
117
152
|
args = parse_arguments()
|
118
153
|
|
119
154
|
# 初始化日志系统
|
@@ -130,62 +165,25 @@ def main():
|
|
130
165
|
logger.info(f"📁 日志目录: {args.log_dir}")
|
131
166
|
logger.info(f"🎯 根类: {root_class}")
|
132
167
|
|
133
|
-
#
|
168
|
+
# 创建重构器并执行重构
|
134
169
|
reconstructor = ProtoReconstructor(sources_dir, output_dir)
|
135
170
|
reconstructor._verbose = args.verbose # 传递verbose标志
|
136
171
|
results = reconstructor.reconstruct_from_root(root_class)
|
137
172
|
|
138
|
-
#
|
139
|
-
|
140
|
-
# 统计成功和失败的数量
|
141
|
-
success_count = len(results)
|
142
|
-
failed_count = len(reconstructor.failed_classes) if hasattr(reconstructor, 'failed_classes') else 0
|
143
|
-
total_attempted = success_count + failed_count
|
144
|
-
|
145
|
-
logger.success("✅ 重构完成!")
|
146
|
-
logger.info(f"📊 处理统计: 共尝试处理 {total_attempted} 个类型")
|
147
|
-
|
148
|
-
message_count = sum(1 for r in results.values() if hasattr(r, 'fields'))
|
149
|
-
enum_count = sum(1 for r in results.values() if hasattr(r, 'values'))
|
150
|
-
|
151
|
-
logger.info(f" - ✅ 成功: {success_count} 个 (消息: {message_count}, 枚举: {enum_count})")
|
152
|
-
|
153
|
-
# 显示失败的类
|
154
|
-
if hasattr(reconstructor, 'failed_classes') and reconstructor.failed_classes:
|
155
|
-
logger.warning(f" - ❌ 失败: {failed_count} 个")
|
156
|
-
for failed_class, reason in reconstructor.failed_classes.items():
|
157
|
-
logger.warning(f" • {failed_class}: {reason}")
|
158
|
-
|
159
|
-
# 显示跳过的类
|
160
|
-
if hasattr(reconstructor, 'skipped_classes') and reconstructor.skipped_classes:
|
161
|
-
skipped_count = len(reconstructor.skipped_classes)
|
162
|
-
logger.info(f" - ⏭️ 跳过: {skipped_count} 个 (基础类型或已处理)")
|
163
|
-
if args.verbose:
|
164
|
-
for skipped_class, reason in reconstructor.skipped_classes.items():
|
165
|
-
logger.info(f" • {skipped_class}: {reason}")
|
166
|
-
else:
|
167
|
-
logger.error("❌ 没有生成任何proto文件!")
|
168
|
-
logger.error("请检查:")
|
169
|
-
logger.error(" 1. 根类名是否正确")
|
170
|
-
logger.error(" 2. Java源码目录是否包含对应的文件")
|
171
|
-
logger.error(" 3. 类是否为protobuf消息类")
|
172
|
-
|
173
|
-
# 显示详细的失败信息
|
174
|
-
if hasattr(reconstructor, 'failed_classes') and reconstructor.failed_classes:
|
175
|
-
logger.error("失败的类:")
|
176
|
-
for failed_class, reason in reconstructor.failed_classes.items():
|
177
|
-
logger.error(f" • {failed_class}: {reason}")
|
178
|
-
|
179
|
-
sys.exit(1)
|
173
|
+
# 输出结果统计
|
174
|
+
print_results_summary(reconstructor, results, logger, args.verbose)
|
180
175
|
|
181
176
|
except KeyboardInterrupt:
|
177
|
+
# 处理用户中断
|
182
178
|
if args:
|
183
179
|
logger = get_logger("main")
|
184
180
|
logger.warning("⚠️ 操作被用户中断")
|
185
181
|
else:
|
186
182
|
print("\n⚠️ 操作被用户中断")
|
187
183
|
sys.exit(1)
|
184
|
+
|
188
185
|
except Exception as e:
|
186
|
+
# 处理其他异常
|
189
187
|
if args:
|
190
188
|
logger = get_logger("main")
|
191
189
|
logger.error(f"❌ 重构失败: {e}")
|
@@ -194,7 +192,6 @@ def main():
|
|
194
192
|
else:
|
195
193
|
print(f"\n❌ 重构失败: {e}")
|
196
194
|
if hasattr(args, 'verbose') and args.verbose:
|
197
|
-
import traceback
|
198
195
|
traceback.print_exc()
|
199
196
|
sys.exit(1)
|
200
197
|
|
models/__init__.py
CHANGED
@@ -1,27 +1,34 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
模型定义模块
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
name: str
|
8
|
-
type: str
|
9
|
-
tag: int
|
10
|
-
rule: str # "optional", "repeated", or "oneof"
|
4
|
+
包含Protobuf消息、字段、枚举等数据结构的定义
|
5
|
+
所有具体的类定义都在相应的子模块中
|
6
|
+
"""
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
# 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
|
9
|
+
try:
|
10
|
+
# 相对导入(包环境)
|
11
|
+
from .message_definition import (
|
12
|
+
MessageDefinition,
|
13
|
+
FieldDefinition,
|
14
|
+
OneofDefinition,
|
15
|
+
EnumDefinition,
|
16
|
+
EnumValueDefinition
|
17
|
+
)
|
18
|
+
except ImportError:
|
19
|
+
# 绝对导入(开发环境)
|
20
|
+
from models.message_definition import (
|
21
|
+
MessageDefinition,
|
22
|
+
FieldDefinition,
|
23
|
+
OneofDefinition,
|
24
|
+
EnumDefinition,
|
25
|
+
EnumValueDefinition
|
26
|
+
)
|
17
27
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
fields: List[FieldDefinition] = field(default_factory=list)
|
26
|
-
oneofs: Dict[str, OneofDefinition] = field(default_factory=dict)
|
27
|
-
dependencies: Set[str] = field(default_factory=set)
|
28
|
+
__all__ = [
|
29
|
+
'MessageDefinition',
|
30
|
+
'FieldDefinition',
|
31
|
+
'OneofDefinition',
|
32
|
+
'EnumDefinition',
|
33
|
+
'EnumValueDefinition'
|
34
|
+
]
|
parsing/__init__.py
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
"""
|
2
|
+
解析器模块
|
3
|
+
|
4
|
+
包含各种源码解析器:
|
5
|
+
- Java解析器:解析Java源码,提取字段标签和类型信息
|
6
|
+
- 枚举解析器:专门处理Java枚举类的解析
|
7
|
+
"""
|
8
|
+
|
9
|
+
# 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
|
10
|
+
try:
|
11
|
+
# 相对导入(包环境)
|
12
|
+
from .java_parser import JavaParser
|
13
|
+
from .enum_parser import EnumParser
|
14
|
+
except ImportError:
|
15
|
+
# 绝对导入(开发环境)
|
16
|
+
from parsing.java_parser import JavaParser
|
17
|
+
from parsing.enum_parser import EnumParser
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
'JavaParser',
|
21
|
+
'EnumParser'
|
22
|
+
]
|
1
23
|
|
2
24
|
|
3
25
|
|
parsing/enum_parser.py
CHANGED
@@ -8,8 +8,16 @@
|
|
8
8
|
import re
|
9
9
|
import os
|
10
10
|
from typing import List, Optional, Dict, Tuple
|
11
|
-
|
12
|
-
|
11
|
+
|
12
|
+
# 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
|
13
|
+
try:
|
14
|
+
# 相对导入(包环境)
|
15
|
+
from ..models.message_definition import EnumDefinition, EnumValueDefinition
|
16
|
+
from ..utils.logger import get_logger
|
17
|
+
except ImportError:
|
18
|
+
# 绝对导入(开发环境)
|
19
|
+
from models.message_definition import EnumDefinition, EnumValueDefinition
|
20
|
+
from utils.logger import get_logger
|
13
21
|
|
14
22
|
|
15
23
|
class EnumParser:
|
parsing/java_parser.py
CHANGED
@@ -11,7 +11,13 @@ import re
|
|
11
11
|
from pathlib import Path
|
12
12
|
from typing import Optional, Tuple, List
|
13
13
|
|
14
|
-
|
14
|
+
# 智能导入:同时支持相对导入(包环境)和绝对导入(开发环境)
|
15
|
+
try:
|
16
|
+
# 相对导入(包环境)
|
17
|
+
from ..utils.logger import get_logger
|
18
|
+
except ImportError:
|
19
|
+
# 绝对导入(开发环境)
|
20
|
+
from utils.logger import get_logger
|
15
21
|
|
16
22
|
|
17
23
|
class JavaParser:
|
@@ -27,12 +33,13 @@ class JavaParser:
|
|
27
33
|
self.logger = get_logger("java_parser")
|
28
34
|
|
29
35
|
# 匹配newMessageInfo调用的正则表达式
|
30
|
-
#
|
36
|
+
# 格式1:GeneratedMessageLite.newMessageInfo(DEFAULT_INSTANCE, "字节码", new Object[]{对象数组})
|
37
|
+
# 格式2:GeneratedMessageLite.newMessageInfo(DEFAULT_INSTANCE, "字节码", null)
|
31
38
|
self.new_message_info_pattern = re.compile(
|
32
39
|
r'GeneratedMessageLite\.newMessageInfo\(\s*'
|
33
40
|
r'DEFAULT_INSTANCE\s*,\s*'
|
34
41
|
r'"([^"]*)",\s*' # 捕获字节码字符串
|
35
|
-
r'new\s+Object\[\]\s*\{([^}]*)\}', #
|
42
|
+
r'(?:new\s+Object\[\]\s*\{([^}]*)\}|null)', # 捕获对象数组或null
|
36
43
|
re.DOTALL
|
37
44
|
)
|
38
45
|
|
@@ -50,17 +57,26 @@ class JavaParser:
|
|
50
57
|
# 读取Java文件内容
|
51
58
|
content = java_file_path.read_text(encoding='utf-8')
|
52
59
|
|
53
|
-
#
|
54
|
-
|
55
|
-
if not
|
60
|
+
# 查找所有newMessageInfo调用
|
61
|
+
matches = self.new_message_info_pattern.findall(content)
|
62
|
+
if not matches:
|
56
63
|
return None, None
|
57
64
|
|
58
|
-
#
|
59
|
-
|
60
|
-
objects_str = match.group(2)
|
65
|
+
# 获取主类的字段标签
|
66
|
+
main_class_field_tags = self._extract_field_number_constants(content)
|
61
67
|
|
62
|
-
#
|
63
|
-
|
68
|
+
# 根据字段匹配选择正确的newMessageInfo调用
|
69
|
+
best_match = self._select_main_class_message_info(matches, main_class_field_tags)
|
70
|
+
if not best_match:
|
71
|
+
return None, None
|
72
|
+
|
73
|
+
info_string, objects_str = best_match
|
74
|
+
|
75
|
+
# 解析对象数组(允许null/空对象数组)
|
76
|
+
if objects_str and objects_str.strip():
|
77
|
+
objects_array = self._parse_objects_array(objects_str)
|
78
|
+
else:
|
79
|
+
objects_array = [] # 空消息的情况(null或空字符串)
|
64
80
|
|
65
81
|
return info_string, objects_array
|
66
82
|
|
@@ -68,6 +84,142 @@ class JavaParser:
|
|
68
84
|
self.logger.error(f"❌ 解析Java文件失败 {java_file_path}: {e}")
|
69
85
|
return None, None
|
70
86
|
|
87
|
+
def parse_inner_class_from_file(self, java_file_path: Path, inner_class_name: str) -> Tuple[Optional[str], Optional[List[str]]]:
|
88
|
+
"""
|
89
|
+
从外部类文件中解析指定的内部类的protobuf信息
|
90
|
+
|
91
|
+
Args:
|
92
|
+
java_file_path: 外部类Java文件路径
|
93
|
+
inner_class_name: 内部类名(如"SkipRecovery")
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
Tuple[字节码字符串, 对象数组] 或 (None, None) 如果解析失败
|
97
|
+
"""
|
98
|
+
try:
|
99
|
+
# 读取Java文件内容
|
100
|
+
content = java_file_path.read_text(encoding='utf-8')
|
101
|
+
|
102
|
+
# 提取指定内部类的内容
|
103
|
+
inner_class_content = self._extract_inner_class_content(content, inner_class_name)
|
104
|
+
if not inner_class_content:
|
105
|
+
self.logger.error(f"❌ 在文件 {java_file_path} 中找不到内部类: {inner_class_name}")
|
106
|
+
return None, None
|
107
|
+
|
108
|
+
# 在内部类内容中查找newMessageInfo调用
|
109
|
+
matches = self.new_message_info_pattern.findall(inner_class_content)
|
110
|
+
if not matches:
|
111
|
+
self.logger.debug(f" 🔍 内部类 {inner_class_name} 中没有找到newMessageInfo调用")
|
112
|
+
return None, None
|
113
|
+
|
114
|
+
# 对于内部类,通常只有一个newMessageInfo调用
|
115
|
+
info_string, objects_str = matches[0]
|
116
|
+
|
117
|
+
# 解析对象数组(允许null/空对象数组)
|
118
|
+
if objects_str and objects_str.strip():
|
119
|
+
objects_array = self._parse_objects_array(objects_str)
|
120
|
+
else:
|
121
|
+
objects_array = [] # 空消息的情况(null或空字符串)
|
122
|
+
|
123
|
+
# 为内部类单独提取字段标签
|
124
|
+
self._extract_inner_class_field_tags(java_file_path, inner_class_name, inner_class_content)
|
125
|
+
|
126
|
+
self.logger.info(f" ✅ 成功解析内部类 {inner_class_name}: {len(objects_array)} 个对象")
|
127
|
+
return info_string, objects_array
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
self.logger.error(f"❌ 解析内部类失败 {inner_class_name} from {java_file_path}: {e}")
|
131
|
+
return None, None
|
132
|
+
|
133
|
+
def _extract_inner_class_content(self, content: str, inner_class_name: str) -> Optional[str]:
|
134
|
+
"""
|
135
|
+
从Java文件内容中提取指定内部类的内容
|
136
|
+
|
137
|
+
Args:
|
138
|
+
content: Java文件内容
|
139
|
+
inner_class_name: 内部类名
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
内部类的内容,如果找不到则返回None
|
143
|
+
"""
|
144
|
+
# 查找内部类定义的开始
|
145
|
+
# 匹配模式:public static final class InnerClassName extends ...
|
146
|
+
class_pattern = rf'public\s+static\s+final\s+class\s+{re.escape(inner_class_name)}\s+extends\s+'
|
147
|
+
match = re.search(class_pattern, content)
|
148
|
+
|
149
|
+
if not match:
|
150
|
+
# 尝试更宽松的匹配
|
151
|
+
class_pattern = rf'class\s+{re.escape(inner_class_name)}\s+extends\s+'
|
152
|
+
match = re.search(class_pattern, content)
|
153
|
+
|
154
|
+
if not match:
|
155
|
+
return None
|
156
|
+
|
157
|
+
# 找到类定义的开始位置
|
158
|
+
class_start = match.start()
|
159
|
+
|
160
|
+
# 从类定义开始位置往前找到第一个'{'
|
161
|
+
content_from_class = content[class_start:]
|
162
|
+
brace_start = content_from_class.find('{')
|
163
|
+
if brace_start == -1:
|
164
|
+
return None
|
165
|
+
|
166
|
+
# 从第一个'{'开始,找到匹配的'}'
|
167
|
+
start_pos = class_start + brace_start + 1
|
168
|
+
brace_count = 1
|
169
|
+
pos = start_pos
|
170
|
+
|
171
|
+
while pos < len(content) and brace_count > 0:
|
172
|
+
if content[pos] == '{':
|
173
|
+
brace_count += 1
|
174
|
+
elif content[pos] == '}':
|
175
|
+
brace_count -= 1
|
176
|
+
pos += 1
|
177
|
+
|
178
|
+
if brace_count == 0:
|
179
|
+
# 找到了匹配的结束位置
|
180
|
+
inner_class_content = content[start_pos:pos-1]
|
181
|
+
return inner_class_content
|
182
|
+
|
183
|
+
return None
|
184
|
+
|
185
|
+
def _extract_inner_class_field_tags(self, java_file_path: Path, inner_class_name: str, inner_class_content: str) -> None:
|
186
|
+
"""
|
187
|
+
为内部类提取字段标签,并缓存到文件系统中
|
188
|
+
|
189
|
+
Args:
|
190
|
+
java_file_path: Java文件路径
|
191
|
+
inner_class_name: 内部类名
|
192
|
+
inner_class_content: 内部类的源码内容
|
193
|
+
"""
|
194
|
+
# 从内部类内容中提取字段标签
|
195
|
+
field_tags = self._extract_field_tags_from_source(inner_class_content)
|
196
|
+
|
197
|
+
if field_tags:
|
198
|
+
# 创建内部类的虚拟文件路径,用于缓存字段标签
|
199
|
+
# 如:Service$CompleteOnboardingRequest.java -> Service$CompleteOnboardingRequest$InstallationInfo.java
|
200
|
+
virtual_file_path = java_file_path.parent / f"{java_file_path.stem}${inner_class_name}.java"
|
201
|
+
|
202
|
+
# 将字段标签缓存到虚拟文件路径
|
203
|
+
self._cache_field_tags(virtual_file_path, field_tags)
|
204
|
+
|
205
|
+
self.logger.debug(f" 🏷️ 为内部类 {inner_class_name} 提取了 {len(field_tags)} 个字段标签")
|
206
|
+
else:
|
207
|
+
self.logger.debug(f" 🔍 内部类 {inner_class_name} 没有字段标签")
|
208
|
+
|
209
|
+
def _cache_field_tags(self, file_path: Path, field_tags: dict) -> None:
|
210
|
+
"""
|
211
|
+
缓存字段标签到内存中,供后续使用
|
212
|
+
|
213
|
+
Args:
|
214
|
+
file_path: 文件路径(可能是虚拟路径)
|
215
|
+
field_tags: 字段标签字典
|
216
|
+
"""
|
217
|
+
# 使用简单的内存缓存
|
218
|
+
if not hasattr(self, '_field_tags_cache'):
|
219
|
+
self._field_tags_cache = {}
|
220
|
+
|
221
|
+
self._field_tags_cache[str(file_path)] = field_tags
|
222
|
+
|
71
223
|
def _parse_objects_array(self, objects_str: str) -> List[str]:
|
72
224
|
"""
|
73
225
|
解析Java对象数组字符串
|
@@ -183,6 +335,85 @@ class JavaParser:
|
|
183
335
|
|
184
336
|
return part if part else None
|
185
337
|
|
338
|
+
def _select_main_class_message_info(self, matches: List[tuple], main_class_field_tags: dict) -> Optional[tuple]:
|
339
|
+
"""
|
340
|
+
根据字段匹配选择主类的newMessageInfo调用
|
341
|
+
|
342
|
+
Args:
|
343
|
+
matches: 所有newMessageInfo匹配结果 [(info_string, objects_str), ...]
|
344
|
+
main_class_field_tags: 主类字段标签 {const_name: tag_value}
|
345
|
+
|
346
|
+
Returns:
|
347
|
+
主类的newMessageInfo匹配结果或None
|
348
|
+
"""
|
349
|
+
if not matches:
|
350
|
+
return None
|
351
|
+
|
352
|
+
if len(matches) == 1:
|
353
|
+
return matches[0]
|
354
|
+
|
355
|
+
# 从主类字段标签生成期望的字段名列表
|
356
|
+
expected_fields = set()
|
357
|
+
for const_name in main_class_field_tags.keys():
|
358
|
+
field_name = self._const_name_to_field_name(const_name)
|
359
|
+
expected_fields.add(field_name)
|
360
|
+
|
361
|
+
self.logger.debug(f" 🔍 主类期望字段: {expected_fields}")
|
362
|
+
|
363
|
+
best_match = None
|
364
|
+
best_score = 0
|
365
|
+
|
366
|
+
for info_string, objects_str in matches:
|
367
|
+
# 解析对象数组(允许null/空对象数组)
|
368
|
+
if objects_str and objects_str.strip():
|
369
|
+
objects_array = self._parse_objects_array(objects_str)
|
370
|
+
else:
|
371
|
+
objects_array = [] # 空消息的情况(null或空字符串)
|
372
|
+
|
373
|
+
# 计算匹配分数
|
374
|
+
score = self._calculate_field_match_score(objects_array, expected_fields)
|
375
|
+
|
376
|
+
self.logger.debug(f" 📊 对象数组 {objects_array[:3]}... 匹配分数: {score}")
|
377
|
+
|
378
|
+
if score > best_score:
|
379
|
+
best_score = score
|
380
|
+
best_match = (info_string, objects_str)
|
381
|
+
|
382
|
+
if best_match:
|
383
|
+
self.logger.info(f" ✅ 选择主类newMessageInfo,匹配分数: {best_score}")
|
384
|
+
else:
|
385
|
+
self.logger.warning(f" ⚠️ 无法找到匹配的主类newMessageInfo")
|
386
|
+
|
387
|
+
return best_match
|
388
|
+
|
389
|
+
def _calculate_field_match_score(self, objects_array: List[str], expected_fields: set) -> int:
|
390
|
+
"""
|
391
|
+
计算对象数组与期望字段的匹配分数
|
392
|
+
|
393
|
+
Args:
|
394
|
+
objects_array: 解析后的对象数组
|
395
|
+
expected_fields: 期望的字段名集合
|
396
|
+
|
397
|
+
Returns:
|
398
|
+
匹配分数(匹配的字段数量)
|
399
|
+
"""
|
400
|
+
if not objects_array or not expected_fields:
|
401
|
+
return 0
|
402
|
+
|
403
|
+
match_count = 0
|
404
|
+
|
405
|
+
for obj in objects_array:
|
406
|
+
# 检查是否是字段名(以_结尾的字符串)
|
407
|
+
if obj.endswith('_'):
|
408
|
+
if obj in expected_fields:
|
409
|
+
match_count += 1
|
410
|
+
# 检查是否是类引用(不以_结尾,可能是oneof字段的类型)
|
411
|
+
elif not obj.endswith('_'):
|
412
|
+
# 类引用也算作有效匹配,但权重较低
|
413
|
+
match_count += 0.5
|
414
|
+
|
415
|
+
return int(match_count)
|
416
|
+
|
186
417
|
def parse_enum_file(self, java_file_path: Path) -> Optional[List[tuple]]:
|
187
418
|
"""
|
188
419
|
解析Java枚举文件,提取枚举值和数值
|
@@ -369,6 +600,18 @@ class JavaParser:
|
|
369
600
|
字段标签映射 {field_name: tag} 或 None 如果解析失败
|
370
601
|
"""
|
371
602
|
try:
|
603
|
+
# 首先检查是否有缓存的字段标签(用于内部类)
|
604
|
+
if hasattr(self, '_field_tags_cache'):
|
605
|
+
cache_key = str(java_file_path)
|
606
|
+
if cache_key in self._field_tags_cache:
|
607
|
+
self.logger.debug(f" 🎯 使用缓存的字段标签: {java_file_path}")
|
608
|
+
return self._field_tags_cache[cache_key]
|
609
|
+
|
610
|
+
# 检查文件是否存在(虚拟文件路径不存在)
|
611
|
+
if not java_file_path.exists():
|
612
|
+
self.logger.debug(f" 📁 文件不存在,跳过字段标签提取: {java_file_path}")
|
613
|
+
return None
|
614
|
+
|
372
615
|
# 读取Java文件内容
|
373
616
|
content = java_file_path.read_text(encoding='utf-8')
|
374
617
|
|
@@ -451,7 +694,7 @@ class JavaParser:
|
|
451
694
|
|
452
695
|
def _extract_field_number_constants(self, content: str) -> dict:
|
453
696
|
"""
|
454
|
-
|
697
|
+
提取主类的FIELD_NUMBER常量(排除内部类)
|
455
698
|
|
456
699
|
Args:
|
457
700
|
content: Java文件内容
|
@@ -459,19 +702,65 @@ class JavaParser:
|
|
459
702
|
Returns:
|
460
703
|
常量名到值的映射 {const_name: value}
|
461
704
|
"""
|
705
|
+
# 首先找到主类的定义范围
|
706
|
+
main_class_content = self._extract_main_class_content(content)
|
707
|
+
|
462
708
|
field_tag_pattern = re.compile(
|
463
709
|
r'\s*public\s+static\s+final\s+int\s+' # 允许行首有空白字符
|
464
710
|
r'([A-Z0-9_]+)_FIELD_NUMBER\s*=\s*(\d+)\s*;' # 允许常量名包含数字
|
465
711
|
)
|
466
712
|
|
467
713
|
constants = {}
|
468
|
-
for match in field_tag_pattern.finditer(
|
714
|
+
for match in field_tag_pattern.finditer(main_class_content):
|
469
715
|
const_name = match.group(1)
|
470
716
|
tag_value = int(match.group(2))
|
471
717
|
constants[const_name] = tag_value
|
472
718
|
|
473
719
|
return constants
|
474
720
|
|
721
|
+
def _extract_main_class_content(self, content: str) -> str:
|
722
|
+
"""
|
723
|
+
提取主类的内容,排除内部类定义
|
724
|
+
|
725
|
+
Args:
|
726
|
+
content: Java文件内容
|
727
|
+
|
728
|
+
Returns:
|
729
|
+
主类内容(不包括内部类)
|
730
|
+
"""
|
731
|
+
# 找到主类的开始位置
|
732
|
+
main_class_pattern = re.compile(
|
733
|
+
r'public\s+final\s+class\s+\w+(?:\$\w+)?\s+extends\s+GeneratedMessageLite.*?\{',
|
734
|
+
re.DOTALL
|
735
|
+
)
|
736
|
+
|
737
|
+
main_class_match = main_class_pattern.search(content)
|
738
|
+
if not main_class_match:
|
739
|
+
# 如果找不到主类定义,返回整个内容作为回退
|
740
|
+
return content
|
741
|
+
|
742
|
+
main_class_start = main_class_match.end()
|
743
|
+
|
744
|
+
# 找到第一个内部类的开始位置
|
745
|
+
inner_class_pattern = re.compile(
|
746
|
+
r'\n\s*public\s+(?:static\s+)?(?:final\s+)?class\s+\w+\s+extends\s+',
|
747
|
+
re.MULTILINE
|
748
|
+
)
|
749
|
+
|
750
|
+
# 从主类开始位置搜索内部类
|
751
|
+
content_from_main_class = content[main_class_start:]
|
752
|
+
inner_class_match = inner_class_pattern.search(content_from_main_class)
|
753
|
+
|
754
|
+
if inner_class_match:
|
755
|
+
# 如果找到内部类,只返回主类部分
|
756
|
+
inner_class_start = main_class_start + inner_class_match.start()
|
757
|
+
main_class_content = content[:inner_class_start]
|
758
|
+
else:
|
759
|
+
# 如果没有内部类,返回整个内容
|
760
|
+
main_class_content = content
|
761
|
+
|
762
|
+
return main_class_content
|
763
|
+
|
475
764
|
def _generate_possible_constant_names(self, field_name: str) -> List[str]:
|
476
765
|
"""
|
477
766
|
根据字段名生成可能的常量名
|
pyproject.toml
CHANGED