reproto 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. .git/COMMIT_EDITMSG +11 -26
  2. .git/FETCH_HEAD +2 -0
  3. .git/index +0 -0
  4. .git/logs/HEAD +6 -0
  5. .git/logs/refs/heads/iyue +6 -0
  6. .git/logs/refs/remotes/gitlab/iyue +6 -0
  7. .git/logs/refs/remotes/origin/iyue +6 -0
  8. .git/objects/15/eb3f02479e633439ec83c143e703f8448043a1 +0 -0
  9. .git/objects/26/1f67f3b731b32f6d77de9dd7be2d61e2a14ace +0 -0
  10. .git/objects/29/4708b82b343e04e7a6685f5cd1287f3d17f7a9 +0 -0
  11. .git/objects/2c/fcddd7eebeb4eee0562dc384a79366bc7b04bb +0 -0
  12. .git/objects/2e/2c1c42f5ac5d665cc672d3792078b756d9ab0e +0 -0
  13. .git/objects/34/192f0df5f5b694c881d086019eda349608d222 +0 -0
  14. .git/objects/35/8bace20b731ff1bbb256d2a0158dfc84720978 +0 -0
  15. .git/objects/3e/d953d5ed2c66722045f5e39d068696a0853b4f +0 -0
  16. .git/objects/44/4af3a0d68ba81976f67c7b638c9b0db4640709 +0 -0
  17. .git/objects/4b/43f5f577e1cdc35f8e06d178d8f5c892034061 +0 -0
  18. .git/objects/4c/16984be6ef7b24d01604821e9728cb579acd69 +0 -0
  19. .git/objects/4d/6d457bfabc4af842e5ddc2d56eb059d5dfdc9d +0 -0
  20. .git/objects/55/6723fdd4f525eed41c52fa80defca3f0c81c47 +0 -0
  21. .git/objects/56/79064a3138031d3a92d60aa629b82009fd0d1d +0 -0
  22. .git/objects/58/0d8f872aa869e42ba608f64888b1349f8b3ff4 +0 -0
  23. .git/objects/5a/11f9d70791e06a0570e01d3dcbbae39cde55db +0 -0
  24. .git/objects/5c/715dcb05e32db7a7a3b030f07524bdd8a56849 +0 -0
  25. .git/objects/5d/e2e9d536c2c0b78e8f9b3b61daa531a55332dc +0 -0
  26. .git/objects/65/a4f0ada7519f8b1e6a7c7e287541b8effde9fd +0 -0
  27. .git/objects/66/7291e131d4769e7d028346a0cc7a0c05d500e5 +0 -0
  28. .git/objects/67/f54a4a3ede6749acc7c718ad97a86634215b5e +0 -0
  29. .git/objects/70/ded03ee69f30850c938a8129be308cb30772d9 +0 -0
  30. .git/objects/78/3e7e252c20e73e33615c703174766036546ff6 +3 -0
  31. .git/objects/8d/44142ae2d6dbb59d4ebed8587bccd051e5766b +0 -0
  32. .git/objects/8d/4a5767bef0c342f1660526f9671c0944922c40 +0 -0
  33. .git/objects/94/ce01b61b90d1fb21d2d339fbeb22e821b6f413 +0 -0
  34. .git/objects/95/295a15779ebefd563ec777c3d3cced7e8d0209 +0 -0
  35. .git/objects/97/71dd4958faa94d3db229c129f6af35b508905a +0 -0
  36. .git/objects/9e/9978522bc8ca79133d7c11ef8ca3fe3c7eed0a +0 -0
  37. .git/objects/9f/b57064e0c53ed80af8507acaab718a1e80184e +0 -0
  38. .git/objects/b3/28dc445ee33220db9359370fc0089a77174101 +0 -0
  39. .git/objects/b3/d2b3037bede44e7e4d18dc99419f8c712c9c62 +0 -0
  40. .git/objects/c3/93db4841dbbb8acf54e9af12b6705c9f5ecde9 +0 -0
  41. .git/objects/c3/c8594874dd9ff9c21662fd06cac9b5baadbba0 +0 -0
  42. .git/objects/c9/a6ca8f8efee4a5632e9a655ced29f1b708f35e +0 -0
  43. .git/objects/c9/cdef9ab627b874ffe6455a47583a75bf16496d +0 -0
  44. .git/objects/db/beedb30613f79ae3ff67df1428cf8ade223711 +0 -0
  45. .git/objects/df/f8f64cee2b97df7d86f73207c5e690f98f0208 +0 -0
  46. .git/objects/eb/528f06c622d54e411e9e05b3a200b4ac624a90 +0 -0
  47. .git/objects/fc/e15b9dbffd9f37b1f2d46944ee2d0394df6565 +2 -0
  48. .git/objects/fd/267d9de63212db235135fa6834f62572224fc6 +0 -0
  49. .git/refs/heads/iyue +1 -1
  50. .git/refs/remotes/gitlab/iyue +1 -1
  51. .git/refs/remotes/origin/iyue +1 -1
  52. .gitignore +4 -1
  53. ARCHITECTURE.md +146 -48
  54. README.md +145 -24
  55. core/bytecode_parser.py +2 -5
  56. core/info_decoder.py +9 -83
  57. core/reconstructor.py +122 -200
  58. generation/proto_generator.py +19 -68
  59. parsing/java_parser.py +219 -46
  60. pyproject.toml +1 -1
  61. reproto-0.0.9.dist-info/METADATA +295 -0
  62. reproto-0.0.9.dist-info/RECORD +132 -0
  63. utils/file_cache.py +165 -0
  64. utils/type_index.py +341 -0
  65. utils/type_utils.py +414 -0
  66. reproto-0.0.7.dist-info/METADATA +0 -174
  67. reproto-0.0.7.dist-info/RECORD +0 -87
  68. {reproto-0.0.7.dist-info → reproto-0.0.9.dist-info}/WHEEL +0 -0
  69. {reproto-0.0.7.dist-info → reproto-0.0.9.dist-info}/entry_points.txt +0 -0
core/reconstructor.py CHANGED
@@ -12,7 +12,7 @@ Author: AI Assistant
12
12
  import re
13
13
  from pathlib import Path
14
14
  from collections import deque
15
- from typing import Set, Dict, List, Optional
15
+ from typing import Set, Dict, List, Optional, Tuple
16
16
 
17
17
  from parsing.java_parser import JavaParser
18
18
  from parsing.enum_parser import EnumParser
@@ -20,6 +20,8 @@ from core.info_decoder import InfoDecoder
20
20
  from generation.proto_generator import ProtoGenerator
21
21
  from models.message_definition import MessageDefinition, EnumDefinition, EnumValueDefinition
22
22
  from utils.logger import get_logger
23
+ from utils.file_cache import get_file_cache
24
+ from utils.type_utils import type_mapper, naming_converter
23
25
 
24
26
 
25
27
  class JavaSourceAnalyzer:
@@ -31,6 +33,8 @@ class JavaSourceAnalyzer:
31
33
  self._current_class_name = None
32
34
  # 初始化JavaParser用于字段类型解析
33
35
  self.java_parser = JavaParser()
36
+ # 使用文件缓存系统优化I/O性能
37
+ self.file_cache = get_file_cache()
34
38
 
35
39
  def set_current_class(self, class_name: str):
36
40
  """设置当前分析的类"""
@@ -183,41 +187,9 @@ class JavaSourceAnalyzer:
183
187
  java_type: Java类型名
184
188
 
185
189
  Returns:
186
- protobuf类型名
190
+ 对应的protobuf类型名
187
191
  """
188
- if not java_type:
189
- return 'string'
190
-
191
- # 基础类型映射
192
- basic_types = {
193
- 'int': 'int32',
194
- 'long': 'int64',
195
- 'float': 'float',
196
- 'double': 'double',
197
- 'boolean': 'bool',
198
- 'String': 'string',
199
- 'java.lang.String': 'string',
200
- 'java.lang.Integer': 'int32',
201
- 'java.lang.Long': 'int64',
202
- 'java.lang.Float': 'float',
203
- 'java.lang.Double': 'double',
204
- 'java.lang.Boolean': 'bool',
205
- 'byte[]': 'bytes',
206
- 'ByteString': 'bytes',
207
- 'com.google.protobuf.ByteString': 'bytes',
208
- }
209
-
210
- # 检查是否为基础类型
211
- if java_type in basic_types:
212
- return basic_types[java_type]
213
-
214
- # 如果是完整的类名,提取简单类名
215
- if '.' in java_type:
216
- simple_name = java_type.split('.')[-1]
217
- return simple_name
218
-
219
- # 默认返回原类型名
220
- return java_type
192
+ return type_mapper.java_to_proto_type(java_type)
221
193
 
222
194
  def _get_type_from_setter(self, field_name: str) -> Optional[str]:
223
195
  """
@@ -295,19 +267,21 @@ class JavaSourceAnalyzer:
295
267
  return None
296
268
 
297
269
  def _load_class_content(self, class_name: str) -> Optional[str]:
298
- """加载类的源码内容"""
270
+ """加载类的源码内容(使用缓存优化)"""
299
271
  try:
300
272
  # 标准路径:com.example.Model -> com/example/Model.java
301
273
  file_path = class_name.replace('.', '/') + '.java'
302
274
  full_path = self.sources_dir / file_path
303
275
 
304
- if full_path.exists():
305
- return full_path.read_text(encoding='utf-8')
276
+ # 使用缓存系统获取文件内容
277
+ content = self.file_cache.get_content(full_path)
278
+ if content:
279
+ return content
306
280
 
307
281
  # 备选方案:按简单类名搜索
308
282
  simple_name = class_name.split('.')[-1]
309
283
  for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
310
- return java_file.read_text(encoding='utf-8')
284
+ return self.file_cache.get_content(java_file)
311
285
 
312
286
  return None
313
287
  except Exception:
@@ -346,6 +320,10 @@ class ProtoReconstructor:
346
320
  self.java_source_analyzer = JavaSourceAnalyzer(sources_dir)
347
321
  self.info_decoder.java_source_analyzer = self.java_source_analyzer
348
322
 
323
+ # 🚀 性能优化:移除未使用的索引系统,简化代码
324
+ # 索引系统在实际使用中被基础类型检测绕过,且构建耗时
325
+ # 改为使用直接的文件路径构造和智能包名推断
326
+
349
327
  # 任务调度状态
350
328
  self.processed_classes: Set[str] = set() # 已处理的类
351
329
  self.pending_classes: deque = deque() # 待处理的类队列
@@ -383,7 +361,14 @@ class ProtoReconstructor:
383
361
  # 4. 生成proto文件
384
362
  self._generate_all_proto_files()
385
363
 
386
- # 5. 返回统计信息
364
+ # 5. 输出性能统计信息
365
+ from utils.file_cache import get_file_cache
366
+ file_cache = get_file_cache()
367
+ file_cache.print_stats()
368
+
369
+ # 🚀 性能优化:索引系统已移除,无需统计
370
+
371
+ # 6. 返回统计信息
387
372
  # 报告未知类型统计
388
373
  self._report_unknown_types()
389
374
 
@@ -616,7 +601,7 @@ class ProtoReconstructor:
616
601
 
617
602
  def _should_skip_class(self, class_name: str) -> bool:
618
603
  """
619
- 判断是否应该跳过某个类
604
+ 判断是否应该跳过某个类的处理
620
605
 
621
606
  Args:
622
607
  class_name: 类名
@@ -624,27 +609,15 @@ class ProtoReconstructor:
624
609
  Returns:
625
610
  是否应该跳过
626
611
  """
627
- # 跳过已经处理过的类
612
+ # 已处理过的类
628
613
  if class_name in self.processed_classes:
629
614
  return True
630
-
631
- # 跳过基础类型(包括Java基础类型和常见的系统类型)
632
- basic_types = {
633
- # Java基础类型
634
- 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
635
- 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
636
- 'Byte', 'Short', 'Character',
637
- # Java系统类型
638
- 'java.lang.String', 'java.lang.Integer', 'java.lang.Long',
639
- 'java.lang.Boolean', 'java.lang.Float', 'java.lang.Double',
640
- 'java.lang.Object', 'java.util.List', 'java.util.Map',
641
- 'com.google.protobuf.ByteString', 'com.google.protobuf.MessageLite'
642
- }
643
615
 
644
- if class_name in basic_types:
616
+ # 使用TypeMapper判断基础类型和系统包
617
+ if type_mapper.is_java_basic_type(class_name) or type_mapper.is_system_package(class_name):
645
618
  return True
646
619
 
647
- # 跳过明显的系统类型和内部类型
620
+ # 跳过明显不是protobuf类的包
648
621
  if self._is_system_or_internal_type(class_name):
649
622
  return True
650
623
 
@@ -747,12 +720,7 @@ class ProtoReconstructor:
747
720
  return None
748
721
 
749
722
  # 检查是否为基础类型
750
- basic_proto_types = {
751
- 'string', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
752
- 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'float', 'double', 'bytes'
753
- }
754
-
755
- if type_name in basic_proto_types:
723
+ if type_mapper.is_basic_proto_type(type_name):
756
724
  return None
757
725
 
758
726
  # 如果已经是完整类名,直接返回
@@ -777,7 +745,7 @@ class ProtoReconstructor:
777
745
 
778
746
  def _find_java_file(self, class_name: str) -> Optional[Path]:
779
747
  """
780
- 根据类名查找对应的Java文件
748
+ 根据类名查找对应的Java文件(优化版本)
781
749
 
782
750
  Args:
783
751
  class_name: 完整的Java类名
@@ -785,6 +753,7 @@ class ProtoReconstructor:
785
753
  Returns:
786
754
  Java文件路径,如果找不到则返回None
787
755
  """
756
+ # 🚀 优化1:直接根据包名和类名构造文件路径(你的建议)
788
757
  # 标准路径:com.example.Model -> com/example/Model.java
789
758
  file_path = class_name.replace('.', '/') + '.java'
790
759
  full_path = self.sources_dir / file_path
@@ -792,31 +761,43 @@ class ProtoReconstructor:
792
761
  if full_path.exists():
793
762
  return full_path
794
763
 
795
- # 处理内部类:支持多层嵌套
796
- # com.example.Models$Inner$Deep -> com/example/Models$Inner$Deep.java
764
+ # 🚀 优化2:处理内部类,但避免全目录扫描
797
765
  if '$' in class_name:
798
- # 找到最后一个.的位置,分离包名和类名部分
766
+ # 内部类处理:com.example.Models$Inner -> com/example/Models.java
799
767
  last_dot_index = class_name.rfind('.')
800
768
  if last_dot_index != -1:
801
- package_path = class_name[:last_dot_index].replace('.', '/') # 包路径
802
- class_part = class_name[last_dot_index + 1:] # 类名部分(可能包含多个$)
803
- inner_class_file_path = f"{package_path}/{class_part}.java"
804
- inner_class_full_path = self.sources_dir / inner_class_file_path
769
+ package_path = class_name[:last_dot_index].replace('.', '/')
770
+ class_part = class_name[last_dot_index + 1:]
805
771
 
806
- if inner_class_full_path.exists():
807
- return inner_class_full_path
808
-
809
- # 备选方案:按简单类名搜索
810
- simple_name = class_name.split('.')[-1]
811
- # 对于内部类,简单名称可能包含多个$符号
812
- if '$' in simple_name:
813
- # 对于内部类,直接使用包含$的完整文件名搜索
814
- for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
815
- return java_file
816
- else:
817
- # 对于普通类,使用原来的逻辑
818
- for java_file in self.sources_dir.rglob(f"{simple_name}.java"):
819
- return java_file
772
+ # 提取外部类名($之前的部分)
773
+ outer_class = class_part.split('$')[0]
774
+ outer_class_file_path = f"{package_path}/{outer_class}.java"
775
+ outer_class_full_path = self.sources_dir / outer_class_file_path
776
+
777
+ if outer_class_full_path.exists():
778
+ return outer_class_full_path
779
+
780
+ # 🚀 优化3:简化文件查找逻辑,移除索引依赖
781
+
782
+ # 🚀 优化4:最后的备选方案 - 限制搜索范围
783
+ # 只在当前包及其父包中搜索,避免全目录扫描
784
+ package_parts = class_name.split('.')[:-1] # 获取包名部分
785
+ simple_name = class_name.split('.')[-1].split('$')[0] # 提取简单类名
786
+
787
+ # 构造搜索路径列表,限制搜索范围
788
+ search_paths = []
789
+ for i in range(len(package_parts), 0, -1):
790
+ package_path = '/'.join(package_parts[:i])
791
+ search_paths.append(self.sources_dir / package_path)
792
+
793
+ # 在限定范围内搜索
794
+ for search_path in search_paths:
795
+ if search_path.exists():
796
+ for java_file in search_path.rglob(f"{simple_name}.java"):
797
+ # 验证找到的文件是否匹配
798
+ relative_path = java_file.relative_to(self.sources_dir)
799
+ if relative_path.stem == simple_name:
800
+ return java_file
820
801
 
821
802
  return None
822
803
 
@@ -840,7 +821,7 @@ class ProtoReconstructor:
840
821
  return candidate
841
822
 
842
823
  return None
843
-
824
+
844
825
  def _generate_candidate_packages(self, current_package: str) -> List[str]:
845
826
  """
846
827
  动态生成候选包名列表
@@ -913,7 +894,7 @@ class ProtoReconstructor:
913
894
 
914
895
  def _find_best_matching_class(self, type_name: str, current_package: str, current_class: str = None) -> Optional[str]:
915
896
  """
916
- 查找最佳匹配的类(用于处理推断失败的情况)
897
+ 查找最佳匹配的类(高性能版本)
917
898
 
918
899
  Args:
919
900
  type_name: 类型名(如 IdData)
@@ -923,19 +904,33 @@ class ProtoReconstructor:
923
904
  Returns:
924
905
  最佳匹配的完整类名
925
906
  """
926
- # 首先尝试从当前类的Java源码中获取实际类型
927
- if current_class:
928
- actual_type = self._extract_actual_field_type(current_class, type_name)
929
- if actual_type:
930
- self.logger.info(f" 🔍 源码分析: {type_name} -> {actual_type}")
931
- return actual_type
932
-
933
- # 预检查:如果是基础字段名,可能不需要创建单独的类
907
+ # 🚀 性能优化:优先进行基础类型检测,避免不必要的文件IO
934
908
  if self._is_basic_field_type(type_name, current_class):
935
- self.logger.info(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
909
+ self.logger.debug(f" 🔍 基础字段类型检测: {type_name} -> 跳过类匹配")
936
910
  return None
937
911
 
938
- # 如果源码分析失败,回退到模糊匹配
912
+ # 🚀 性能优化:直接使用智能包名推断,避免索引开销
913
+ # 1. 首先尝试推断完整类名
914
+ inferred_name = self._infer_full_class_name(type_name, current_package)
915
+ if inferred_name:
916
+ self.logger.info(f" 🔍 包名推断: {type_name} -> {inferred_name}")
917
+ return inferred_name
918
+
919
+ # 2. 如果推断失败,使用限制范围的目录搜索
920
+ self.logger.debug(f" 🔍 启用目录搜索: {type_name}")
921
+ return self._fallback_directory_search(type_name, current_package)
922
+
923
+ def _fallback_directory_search(self, type_name: str, current_package: str) -> Optional[str]:
924
+ """
925
+ 回退的目录扫描方法(当索引匹配失败时使用)
926
+
927
+ Args:
928
+ type_name: 类型名
929
+ current_package: 当前包名
930
+
931
+ Returns:
932
+ 匹配的类名或None
933
+ """
939
934
  matching_classes = []
940
935
 
941
936
  # 在源码目录中搜索
@@ -961,12 +956,12 @@ class ProtoReconstructor:
961
956
  matching_classes.sort(key=lambda x: x[1], reverse=True)
962
957
  best_match = matching_classes[0][0]
963
958
 
964
- self.logger.info(f" 🔍 智能匹配: {type_name} -> {best_match}")
959
+ self.logger.info(f" 🔍 目录扫描匹配: {type_name} -> {best_match}")
965
960
  return best_match
966
961
 
967
962
  def _is_basic_field_type(self, type_name: str, current_class: str = None) -> bool:
968
963
  """
969
- 检查是否为基础字段类型,避免为简单字段创建不必要的类
964
+ 快速检查是否为基础字段类型(高性能版本)
970
965
 
971
966
  Args:
972
967
  type_name: 类型名
@@ -975,61 +970,11 @@ class ProtoReconstructor:
975
970
  Returns:
976
971
  是否为基础字段类型
977
972
  """
978
- # 首先检查是否为Java基础类型
979
- basic_java_types = {
980
- 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
981
- 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
982
- 'Byte', 'Short', 'Character'
983
- }
984
-
985
- if type_name in basic_java_types:
986
- return True
987
-
988
- # 常见的基础字段名模式
989
- basic_patterns = [
990
- 'tags', # tags_ 字段通常是 repeated string
991
- 'ids', # ids_ 字段通常是 repeated string 或 repeated int64
992
- 'values', # values_ 字段通常是基础类型数组
993
- 'names', # names_ 字段通常是 repeated string
994
- 'urls', # urls_ 字段通常是 repeated string
995
- 'emails', # emails_ 字段通常是 repeated string
996
- ]
997
-
998
- type_lower = type_name.lower()
999
-
1000
- # 检查是否匹配基础模式
1001
- if type_lower in basic_patterns:
1002
- return True
1003
-
1004
- # 如果有当前类,尝试从Java源码中验证
1005
- if current_class:
1006
- try:
1007
- java_file = self._find_java_file(current_class)
1008
- if java_file:
1009
- content = java_file.read_text(encoding='utf-8')
1010
-
1011
- # 查找对应的字段声明,检查是否为基础类型
1012
- field_name_pattern = type_lower.rstrip('s') + 's?_' # tags -> tags?_
1013
- import re
1014
-
1015
- # 查找字段声明:private List<String> tags_; 或 private Internal.ProtobufList<String> tags_;
1016
- patterns = [
1017
- rf'private\s+(?:Internal\.)?ProtobufList<String>\s+{field_name_pattern}',
1018
- rf'private\s+List<String>\s+{field_name_pattern}',
1019
- rf'private\s+(?:Internal\.)?ProtobufList<Integer>\s+{field_name_pattern}',
1020
- rf'private\s+List<Integer>\s+{field_name_pattern}',
1021
- rf'private\s+(?:Internal\.)?ProtobufList<Long>\s+{field_name_pattern}',
1022
- rf'private\s+List<Long>\s+{field_name_pattern}',
1023
- ]
1024
-
1025
- for pattern in patterns:
1026
- if re.search(pattern, content, re.IGNORECASE):
1027
- return True
1028
-
1029
- except Exception as e:
1030
- self.logger.debug(f" 检查基础字段类型时出错: {e}")
973
+ # 🚀 性能优化:使用缓存的类型检查器,避免重复计算
974
+ from utils.type_utils import TypeMapper
1031
975
 
1032
- return False
976
+ # 直接使用统一的基础类型检查,无需额外逻辑
977
+ return TypeMapper.is_java_basic_type(type_name)
1033
978
 
1034
979
  def _is_valid_package_for_matching(self, candidate_package: str, current_package: str) -> bool:
1035
980
  """
@@ -1100,7 +1045,7 @@ class ProtoReconstructor:
1100
1045
 
1101
1046
  def _extract_actual_field_type(self, class_name: str, inferred_type: str) -> Optional[str]:
1102
1047
  """
1103
- 从Java源码中提取字段的实际类型
1048
+ 从Java源码中提取字段的实际类型(优化版本)
1104
1049
 
1105
1050
  Args:
1106
1051
  class_name: 当前类的完整名称
@@ -1109,57 +1054,36 @@ class ProtoReconstructor:
1109
1054
  Returns:
1110
1055
  实际的完整类型名
1111
1056
  """
1112
- # 首先检查是否为基础类型,如果是则直接跳过
1113
- basic_types = {
1114
- 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
1115
- 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
1116
- 'Byte', 'Short', 'Character'
1117
- }
1057
+ # 🚀 优化:使用统一的类型检查器
1058
+ from utils.type_utils import TypeMapper
1118
1059
 
1119
- if inferred_type in basic_types:
1060
+ if TypeMapper.is_java_basic_type(inferred_type):
1120
1061
  self.logger.debug(f" 跳过基础类型: {inferred_type}")
1121
1062
  return None
1122
1063
 
1064
+ # 🚀 性能优化:简化源码分析,避免复杂的正则表达式匹配
1065
+ # 对于大多数情况,索引系统已经能够提供足够准确的匹配
1066
+ # 这里只做最基本的检查,避免耗时的文件IO和正则匹配
1067
+
1123
1068
  try:
1124
- java_file = self._find_java_file(class_name)
1125
- if not java_file:
1126
- return None
1127
-
1128
- # 读取Java源码
1129
- content = java_file.read_text(encoding='utf-8')
1069
+ # 使用索引系统进行快速查找,避免文件IO
1070
+ from utils.type_index import get_type_index
1071
+ type_index = get_type_index(self.sources_dir)
1130
1072
 
1131
- # 查找字段声明模式:private SomeType fieldName_;
1132
- # 我们要找的是以inferred_type结尾的类型声明
1133
- import re
1073
+ # 构造可能的完整类名
1074
+ package_name = '.'.join(class_name.split('.')[:-1])
1075
+ possible_full_name = f"{package_name}.{inferred_type}"
1134
1076
 
1135
- # 匹配模式:private (.*IdData) .*_;
1136
- pattern = rf'private\s+(\w*{re.escape(inferred_type)})\s+\w+_;'
1137
- matches = re.findall(pattern, content)
1138
-
1139
- if matches:
1140
- # 取第一个匹配的类型
1141
- actual_type_simple = matches[0]
1142
-
1143
- # 再次检查匹配的类型是否为基础类型
1144
- if actual_type_simple in basic_types:
1145
- self.logger.debug(f" 匹配到基础类型,跳过: {actual_type_simple}")
1146
- return None
1147
-
1148
- # 检查是否有import语句
1149
- import_pattern = rf'import\s+([^;]*\.{re.escape(actual_type_simple)});'
1150
- import_matches = re.findall(import_pattern, content)
1151
-
1152
- if import_matches:
1153
- return import_matches[0] # 返回完整的包名.类名
1154
- else:
1155
- # 如果没有import,假设在同一个包中
1156
- package_name = '.'.join(class_name.split('.')[:-1])
1157
- return f"{package_name}.{actual_type_simple}"
1077
+ # 使用索引快速检查
1078
+ result = type_index.find_best_match(inferred_type, package_name)
1079
+ if result:
1080
+ self.logger.debug(f" 索引快速匹配: {inferred_type} -> {result}")
1081
+ return result
1158
1082
 
1159
1083
  return None
1160
1084
 
1161
1085
  except Exception as e:
1162
- self.logger.error(f" ⚠️ 源码分析失败: {e}")
1086
+ self.logger.debug(f" ⚠️ 快速类型匹配失败: {e}")
1163
1087
  return None
1164
1088
 
1165
1089
  def _create_enum_definition(self, class_name: str, enum_values: List[tuple]) -> EnumDefinition:
@@ -1292,7 +1216,7 @@ class ProtoReconstructor:
1292
1216
  @staticmethod
1293
1217
  def _to_snake_case(camel_str: str) -> str:
1294
1218
  """
1295
- 将CamelCase转换为snake_case
1219
+ 将CamelCase转换为snake_case(使用统一的命名转换器)
1296
1220
 
1297
1221
  Args:
1298
1222
  camel_str: 驼峰命名字符串
@@ -1300,8 +1224,6 @@ class ProtoReconstructor:
1300
1224
  Returns:
1301
1225
  蛇形命名字符串
1302
1226
  """
1303
- # 处理连续大写字母:XMLParser -> XML_Parser
1304
- s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel_str)
1305
- # 处理小写字母后跟大写字母:userId -> user_Id
1306
- s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1)
1307
- return s2.lower()
1227
+ # 🚀 优化:使用统一的命名转换器,避免重复实现
1228
+ from utils.type_utils import NamingConverter
1229
+ return NamingConverter.to_snake_case(camel_str)
@@ -11,6 +11,7 @@ Author: AI Assistant
11
11
  import re
12
12
  from typing import Dict, Set, List, Union
13
13
  from models.message_definition import MessageDefinition, FieldDefinition, EnumDefinition, EnumValueDefinition
14
+ from utils.type_utils import type_mapper, naming_converter, field_name_processor
14
15
 
15
16
 
16
17
  class ProtoGenerator:
@@ -158,12 +159,13 @@ class ProtoGenerator:
158
159
  """
159
160
  lines = [f'message {message_def.name} {{']
160
161
 
161
- # 生成oneof字段
162
+ # 生成oneof字段(oneof字段内部也按tag排序)
162
163
  for oneof in message_def.oneofs:
163
164
  lines.extend(self._generate_oneof_definition(oneof))
164
165
 
165
- # 生成常规字段
166
- for field in message_def.fields:
166
+ # 生成常规字段(按tag排序)
167
+ sorted_fields = sorted(message_def.fields, key=lambda field: field.tag)
168
+ for field in sorted_fields:
167
169
  lines.append(self._generate_field_definition(field))
168
170
 
169
171
  lines.append('}')
@@ -181,19 +183,24 @@ class ProtoGenerator:
181
183
  """
182
184
  lines = [f'enum {enum_def.name} {{']
183
185
 
184
- # 生成枚举值
185
- for enum_value in enum_def.values:
186
+ # 生成枚举值(按value排序)
187
+ sorted_values = sorted(enum_def.values, key=lambda enum_value: enum_value.value)
188
+ for enum_value in sorted_values:
186
189
  lines.append(f' {enum_value.name} = {enum_value.value};')
187
190
 
188
191
  lines.append('}')
189
192
  return lines
190
193
 
191
194
  def _generate_oneof_definition(self, oneof) -> List[str]:
192
- """生成oneof字段定义"""
195
+ """生成oneof字段定义(字段按tag排序)"""
193
196
  lines = [f' oneof {oneof.name} {{']
194
- for field in oneof.fields:
197
+
198
+ # 对oneof内部的字段按tag排序
199
+ sorted_fields = sorted(oneof.fields, key=lambda field: field.tag)
200
+ for field in sorted_fields:
195
201
  field_type = self._resolve_field_type(field)
196
202
  lines.append(f' {field_type} {field.name} = {field.tag};')
203
+
197
204
  lines.append(' }')
198
205
  return lines
199
206
 
@@ -404,16 +411,7 @@ class ProtoGenerator:
404
411
  Returns:
405
412
  基础proto类型,如果不是基础类型则返回None
406
413
  """
407
- basic_type_mapping = {
408
- 'string': 'string',
409
- 'int32': 'int32',
410
- 'int64': 'int64',
411
- 'bool': 'bool',
412
- 'float': 'float',
413
- 'double': 'double',
414
- 'bytes': 'bytes',
415
- }
416
- return basic_type_mapping.get(type_name)
414
+ return type_mapper.java_to_proto_type(type_name) if type_mapper.is_java_basic_type(type_name) else None
417
415
 
418
416
  def _generate_enum_type_name(self, field_name: str) -> str:
419
417
  """
@@ -425,33 +423,7 @@ class ProtoGenerator:
425
423
  Returns:
426
424
  枚举类型名(PascalCase)
427
425
  """
428
- name = field_name.rstrip('_')
429
-
430
- # 特殊字段名修正
431
- field_name_corrections = {
432
- 'access': 'Access', # 修正拼写
433
- }
434
-
435
- if name in field_name_corrections:
436
- return field_name_corrections[name]
437
-
438
- # 处理常见的枚举后缀
439
- suffix_mappings = {
440
- '_type': 'Type',
441
- '_status': 'Status',
442
- '_code': 'Code'
443
- }
444
-
445
- for suffix, replacement in suffix_mappings.items():
446
- if name.endswith(suffix):
447
- name = name[:-len(suffix)] + replacement
448
- break
449
-
450
- # 处理复数形式:badges -> badge
451
- if name.endswith('s') and len(name) > 1:
452
- name = name[:-1]
453
-
454
- return self._to_pascal_case(name)
426
+ return field_name_processor.generate_type_name_from_field(field_name, 'enum')
455
427
 
456
428
  def _generate_message_type_name(self, field_name: str) -> str:
457
429
  """
@@ -463,23 +435,7 @@ class ProtoGenerator:
463
435
  Returns:
464
436
  消息类型名(PascalCase)
465
437
  """
466
- name = field_name.rstrip('_')
467
-
468
- # 处理常见的消息后缀
469
- suffix_mappings = {
470
- '_info': 'Info',
471
- '_data': 'Data',
472
- '_stats': 'Stats',
473
- '_profile': 'Profile',
474
- '_config': 'Config'
475
- }
476
-
477
- for suffix, replacement in suffix_mappings.items():
478
- if name.endswith(suffix):
479
- name = name[:-len(suffix)] + replacement
480
- break
481
-
482
- return self._to_pascal_case(name)
438
+ return field_name_processor.generate_type_name_from_field(field_name, 'message')
483
439
 
484
440
  @staticmethod
485
441
  def _to_pascal_case(snake_str: str) -> str:
@@ -492,8 +448,7 @@ class ProtoGenerator:
492
448
  Returns:
493
449
  帕斯卡命名字符串
494
450
  """
495
- components = snake_str.split('_')
496
- return ''.join(word.capitalize() for word in components)
451
+ return naming_converter.to_pascal_case(snake_str)
497
452
 
498
453
  @staticmethod
499
454
  def _to_snake_case(camel_str: str) -> str:
@@ -506,8 +461,4 @@ class ProtoGenerator:
506
461
  Returns:
507
462
  蛇形命名字符串
508
463
  """
509
- # 处理连续大写字母:XMLParser -> XML_Parser
510
- s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel_str)
511
- # 处理小写字母后跟大写字母:userId -> user_Id
512
- s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1)
513
- return s2.lower()
464
+ return naming_converter.to_snake_case(camel_str)