reproto 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. .git/COMMIT_EDITMSG +1 -1
  2. .git/index +0 -0
  3. .git/logs/HEAD +3 -0
  4. .git/logs/refs/heads/iyue +3 -0
  5. .git/logs/refs/remotes/gitlab/iyue +3 -0
  6. .git/logs/refs/remotes/origin/iyue +3 -0
  7. .git/objects/15/eb3f02479e633439ec83c143e703f8448043a1 +0 -0
  8. .git/objects/20/cf56ec106bcd66420dd000279f983571b918b6 +0 -0
  9. .git/objects/21/55b64d52922c88527c102d62f23e5c2abbae79 +0 -0
  10. .git/objects/26/1f67f3b731b32f6d77de9dd7be2d61e2a14ace +0 -0
  11. .git/objects/2e/2c1c42f5ac5d665cc672d3792078b756d9ab0e +0 -0
  12. .git/objects/33/52dfa8f5d9eb46cc98ea7ccecf02e4d9df95f7 +0 -0
  13. .git/objects/35/8bace20b731ff1bbb256d2a0158dfc84720978 +0 -0
  14. .git/objects/3c/6f0120229cc2cd8123efbeb7f186eb0a485f29 +0 -0
  15. .git/objects/4d/6d457bfabc4af842e5ddc2d56eb059d5dfdc9d +0 -0
  16. .git/objects/55/6723fdd4f525eed41c52fa80defca3f0c81c47 +0 -0
  17. .git/objects/65/a4f0ada7519f8b1e6a7c7e287541b8effde9fd +0 -0
  18. .git/objects/76/311aa8e59d780763e0d66787067cc5d9613a67 +0 -0
  19. .git/objects/8c/809c42c7ae13007fd885ee7bcffae7acf2c520 +0 -0
  20. .git/objects/8d/44142ae2d6dbb59d4ebed8587bccd051e5766b +0 -0
  21. .git/objects/8d/4a5767bef0c342f1660526f9671c0944922c40 +0 -0
  22. .git/objects/95/295a15779ebefd563ec777c3d3cced7e8d0209 +0 -0
  23. .git/objects/97/56fe0931216a7c40cbf250e1ab8a6dfd589f13 +0 -0
  24. .git/objects/9a/e313cdf64cd82416c1238eb493e6396f799f12 +0 -0
  25. .git/objects/cd/2d6c229438c6b1c694b9392a85888d89ef49c1 +0 -0
  26. .git/objects/db/beedb30613f79ae3ff67df1428cf8ade223711 +0 -0
  27. .git/objects/e8/1433b6ad92206cdadbee1f474b4f99383314cb +0 -0
  28. .git/objects/e9/a15996cb55ac72aeb6611d26e8d22246589943 +0 -0
  29. .git/objects/f7/25a430eb3364460ba854dbc8809edc21dc6c70 +0 -0
  30. .git/objects/fc/e15b9dbffd9f37b1f2d46944ee2d0394df6565 +2 -0
  31. .git/refs/heads/iyue +1 -1
  32. .git/refs/remotes/gitlab/iyue +1 -1
  33. .git/refs/remotes/origin/iyue +1 -1
  34. README.md +36 -116
  35. core/info_decoder.py +512 -105
  36. core/reconstructor.py +645 -84
  37. generation/proto_generator.py +38 -12
  38. main.py +36 -5
  39. parsing/java_parser.py +81 -1
  40. pyproject.toml +13 -2
  41. {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/METADATA +46 -119
  42. {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/RECORD +46 -20
  43. utils/file_cache.py +165 -0
  44. utils/type_index.py +341 -0
  45. {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/WHEEL +0 -0
  46. {reproto-0.0.6.dist-info → reproto-0.0.8.dist-info}/entry_points.txt +0 -0
utils/file_cache.py ADDED
@@ -0,0 +1,165 @@
1
+ """
2
+ 文件内容缓存系统
3
+
4
+ 用于缓存Java源码文件内容,避免重复的文件I/O操作
5
+ 这是解决reproto性能瓶颈的核心优化组件
6
+
7
+ Author: AI Assistant
8
+ """
9
+
10
+ from pathlib import Path
11
+ from typing import Optional, Dict
12
+ import threading
13
+ from utils.logger import get_logger
14
+
15
+
16
+ class FileContentCache:
17
+ """
18
+ 文件内容缓存系统
19
+
20
+ 提供线程安全的文件内容缓存,显著减少重复的文件I/O操作
21
+ 特别适用于需要多次读取同一Java文件的场景
22
+ """
23
+
24
+ def __init__(self):
25
+ """初始化缓存系统"""
26
+ self._cache: Dict[str, str] = {}
27
+ self._stats = {
28
+ 'hits': 0,
29
+ 'misses': 0,
30
+ 'errors': 0
31
+ }
32
+ self._lock = threading.RLock() # 使用可重入锁
33
+ self.logger = get_logger("file_cache")
34
+
35
+ def get_content(self, file_path: Path) -> Optional[str]:
36
+ """
37
+ 获取文件内容,优先从缓存读取
38
+
39
+ Args:
40
+ file_path: 文件路径
41
+
42
+ Returns:
43
+ 文件内容字符串,如果读取失败则返回None
44
+ """
45
+ cache_key = str(file_path.resolve())
46
+
47
+ with self._lock:
48
+ # 检查缓存
49
+ if cache_key in self._cache:
50
+ self._stats['hits'] += 1
51
+ return self._cache[cache_key]
52
+
53
+ # 缓存未命中,读取文件
54
+ try:
55
+ if not file_path.exists():
56
+ self._stats['errors'] += 1
57
+ return None
58
+
59
+ content = file_path.read_text(encoding='utf-8')
60
+ self._cache[cache_key] = content
61
+ self._stats['misses'] += 1
62
+
63
+ # 定期输出缓存统计
64
+ total_requests = self._stats['hits'] + self._stats['misses']
65
+ if total_requests % 50 == 0 and total_requests > 0:
66
+ hit_rate = self._stats['hits'] / total_requests * 100
67
+ self.logger.debug(f"📊 缓存统计: {total_requests} 次请求, 命中率 {hit_rate:.1f}%")
68
+
69
+ return content
70
+
71
+ except Exception as e:
72
+ self._stats['errors'] += 1
73
+ self.logger.warning(f"⚠️ 读取文件失败 {file_path}: {e}")
74
+ return None
75
+
76
+ def preload_files(self, file_paths: list[Path]) -> int:
77
+ """
78
+ 预加载文件列表到缓存
79
+
80
+ Args:
81
+ file_paths: 要预加载的文件路径列表
82
+
83
+ Returns:
84
+ 成功预加载的文件数量
85
+ """
86
+ loaded_count = 0
87
+
88
+ for file_path in file_paths:
89
+ if self.get_content(file_path) is not None:
90
+ loaded_count += 1
91
+
92
+ self.logger.info(f"📁 预加载完成: {loaded_count}/{len(file_paths)} 个文件")
93
+ return loaded_count
94
+
95
+ def clear_cache(self):
96
+ """清空缓存"""
97
+ with self._lock:
98
+ self._cache.clear()
99
+ self.logger.info("🗑️ 缓存已清空")
100
+
101
+ def get_stats(self) -> dict:
102
+ """
103
+ 获取缓存统计信息
104
+
105
+ Returns:
106
+ 包含缓存统计的字典
107
+ """
108
+ with self._lock:
109
+ total_requests = self._stats['hits'] + self._stats['misses']
110
+ hit_rate = (self._stats['hits'] / total_requests * 100) if total_requests > 0 else 0
111
+
112
+ return {
113
+ 'total_requests': total_requests,
114
+ 'cache_hits': self._stats['hits'],
115
+ 'cache_misses': self._stats['misses'],
116
+ 'hit_rate_percent': hit_rate,
117
+ 'errors': self._stats['errors'],
118
+ 'cached_files': len(self._cache)
119
+ }
120
+
121
+ def print_stats(self):
122
+ """打印缓存统计信息"""
123
+ stats = self.get_stats()
124
+
125
+ self.logger.info("📊 文件缓存统计:")
126
+ self.logger.info(f" 总请求数: {stats['total_requests']}")
127
+ self.logger.info(f" 缓存命中: {stats['cache_hits']}")
128
+ self.logger.info(f" 缓存未命中: {stats['cache_misses']}")
129
+ self.logger.info(f" 命中率: {stats['hit_rate_percent']:.1f}%")
130
+ self.logger.info(f" 错误数: {stats['errors']}")
131
+ self.logger.info(f" 已缓存文件: {stats['cached_files']}")
132
+
133
+ # 计算性能提升
134
+ if stats['cache_hits'] > 0:
135
+ io_saved = stats['cache_hits']
136
+ self.logger.info(f" 🚀 节省I/O操作: {io_saved} 次")
137
+
138
+
139
+ # 全局缓存实例
140
+ _global_cache = None
141
+ _cache_lock = threading.Lock()
142
+
143
+
144
+ def get_file_cache() -> FileContentCache:
145
+ """
146
+ 获取全局文件缓存实例(单例模式)
147
+
148
+ Returns:
149
+ FileContentCache实例
150
+ """
151
+ global _global_cache
152
+
153
+ if _global_cache is None:
154
+ with _cache_lock:
155
+ if _global_cache is None:
156
+ _global_cache = FileContentCache()
157
+
158
+ return _global_cache
159
+
160
+
161
+ def clear_global_cache():
162
+ """清空全局缓存"""
163
+ global _global_cache
164
+ if _global_cache is not None:
165
+ _global_cache.clear_cache()
utils/type_index.py ADDED
@@ -0,0 +1,341 @@
1
+ """
2
+ 类型匹配索引系统
3
+
4
+ 用于快速查找Java类型,避免重复的目录扫描操作
5
+ 这是解决reproto性能瓶颈的第二个核心优化组件
6
+
7
+ Author: AI Assistant
8
+ """
9
+
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Set
12
+ import threading
13
+ from utils.logger import get_logger
14
+
15
+
16
+ class TypeMatchingIndex:
17
+ """
18
+ 类型匹配索引系统
19
+
20
+ 构建从类型名到完整类名的多级索引,支持:
21
+ - 精确匹配(完整类名)
22
+ - 简单名匹配(类名)
23
+ - 后缀匹配(如 IdData -> ContactIdData)
24
+ - 包名相似度匹配
25
+ """
26
+
27
+ def __init__(self, sources_dir: Path):
28
+ """
29
+ 初始化索引系统
30
+
31
+ Args:
32
+ sources_dir: Java源码根目录
33
+ """
34
+ self.sources_dir = sources_dir
35
+ self.logger = get_logger("type_index")
36
+
37
+ # 多级索引结构
38
+ self._exact_index: Dict[str, str] = {} # 完整类名 -> 完整类名
39
+ self._simple_index: Dict[str, List[str]] = {} # 简单类名 -> [完整类名列表]
40
+ self._suffix_index: Dict[str, List[str]] = {} # 后缀 -> [完整类名列表]
41
+ self._package_index: Dict[str, List[str]] = {} # 包名 -> [完整类名列表]
42
+
43
+ # 基础类型集合(快速过滤)
44
+ self._basic_types: Set[str] = {
45
+ 'int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char',
46
+ 'String', 'Object', 'Integer', 'Long', 'Float', 'Double', 'Boolean',
47
+ 'Byte', 'Short', 'Character', 'List', 'Map', 'Set', 'Collection'
48
+ }
49
+
50
+ # 索引统计
51
+ self._stats = {
52
+ 'total_classes': 0,
53
+ 'index_hits': 0,
54
+ 'index_misses': 0,
55
+ 'basic_type_skips': 0
56
+ }
57
+
58
+ self._lock = threading.RLock()
59
+ self._is_built = False
60
+
61
+ def build_index(self) -> None:
62
+ """构建所有索引"""
63
+ if self._is_built:
64
+ return
65
+
66
+ with self._lock:
67
+ if self._is_built:
68
+ return
69
+
70
+ self.logger.info("🏗️ 开始构建类型索引...")
71
+
72
+ # 扫描所有Java文件
73
+ java_files = list(self.sources_dir.rglob("*.java"))
74
+ self.logger.info(f"📁 发现 {len(java_files)} 个Java文件")
75
+
76
+ for java_file in java_files:
77
+ self._index_single_file(java_file)
78
+
79
+ self._stats['total_classes'] = len(self._exact_index)
80
+ self._is_built = True
81
+
82
+ self.logger.info(f"✅ 索引构建完成: {self._stats['total_classes']} 个类")
83
+ self._print_index_stats()
84
+
85
+ def _index_single_file(self, java_file: Path) -> None:
86
+ """
87
+ 为单个Java文件建立索引
88
+
89
+ Args:
90
+ java_file: Java文件路径
91
+ """
92
+ try:
93
+ # 获取类名和包名
94
+ class_name = java_file.stem
95
+ relative_path = java_file.relative_to(self.sources_dir)
96
+ package_parts = relative_path.parts[:-1] # 排除文件名
97
+
98
+ if package_parts:
99
+ package_name = '.'.join(package_parts)
100
+ full_class_name = f"{package_name}.{class_name}"
101
+ else:
102
+ package_name = ""
103
+ full_class_name = class_name
104
+
105
+ # 1. 精确索引:完整类名
106
+ self._exact_index[full_class_name] = full_class_name
107
+
108
+ # 2. 简单名索引
109
+ if class_name not in self._simple_index:
110
+ self._simple_index[class_name] = []
111
+ self._simple_index[class_name].append(full_class_name)
112
+
113
+ # 3. 后缀索引(用于匹配如 IdData -> ContactIdData)
114
+ if len(class_name) > 4:
115
+ for suffix_len in [4, 6, 8]: # 多种后缀长度
116
+ if len(class_name) >= suffix_len:
117
+ suffix = class_name[-suffix_len:]
118
+ if suffix not in self._suffix_index:
119
+ self._suffix_index[suffix] = []
120
+ self._suffix_index[suffix].append(full_class_name)
121
+
122
+ # 4. 包名索引
123
+ if package_name:
124
+ if package_name not in self._package_index:
125
+ self._package_index[package_name] = []
126
+ self._package_index[package_name].append(full_class_name)
127
+
128
+ except Exception as e:
129
+ self.logger.warning(f"⚠️ 索引文件失败 {java_file}: {e}")
130
+
131
+ def find_best_match(self, type_name: str, current_package: str = "") -> Optional[str]:
132
+ """
133
+ 查找类型名的最佳匹配
134
+
135
+ Args:
136
+ type_name: 要查找的类型名
137
+ current_package: 当前包名(用于相似度计算)
138
+
139
+ Returns:
140
+ 最佳匹配的完整类名,如果没有找到则返回None
141
+ """
142
+ if not self._is_built:
143
+ self.build_index()
144
+
145
+ # 快速过滤基础类型
146
+ if type_name in self._basic_types:
147
+ self._stats['basic_type_skips'] += 1
148
+ return None
149
+
150
+ with self._lock:
151
+ # 1. 精确匹配
152
+ if type_name in self._exact_index:
153
+ self._stats['index_hits'] += 1
154
+ return self._exact_index[type_name]
155
+
156
+ # 2. 简单名匹配
157
+ if type_name in self._simple_index:
158
+ candidates = self._simple_index[type_name]
159
+ if len(candidates) == 1:
160
+ self._stats['index_hits'] += 1
161
+ return candidates[0]
162
+ else:
163
+ # 多个候选,选择包名最相似的
164
+ best_match = self._select_best_by_package(candidates, current_package)
165
+ if best_match:
166
+ self._stats['index_hits'] += 1
167
+ return best_match
168
+
169
+ # 3. 后缀匹配
170
+ for suffix_len in [4, 6, 8]:
171
+ if len(type_name) >= suffix_len:
172
+ suffix = type_name[-suffix_len:]
173
+ if suffix in self._suffix_index:
174
+ candidates = self._suffix_index[suffix]
175
+ # 过滤:确保候选类名以type_name结尾
176
+ filtered_candidates = [
177
+ c for c in candidates
178
+ if c.split('.')[-1].endswith(type_name)
179
+ ]
180
+ if filtered_candidates:
181
+ best_match = self._select_best_by_package(filtered_candidates, current_package)
182
+ if best_match:
183
+ self._stats['index_hits'] += 1
184
+ return best_match
185
+
186
+ # 4. 未找到匹配
187
+ self._stats['index_misses'] += 1
188
+ return None
189
+
190
+ def _select_best_by_package(self, candidates: List[str], current_package: str) -> Optional[str]:
191
+ """
192
+ 根据包名相似度选择最佳候选
193
+
194
+ Args:
195
+ candidates: 候选类名列表
196
+ current_package: 当前包名
197
+
198
+ Returns:
199
+ 最佳匹配的类名
200
+ """
201
+ if not candidates:
202
+ return None
203
+
204
+ if len(candidates) == 1:
205
+ return candidates[0]
206
+
207
+ if not current_package:
208
+ return candidates[0] # 无包名信息时返回第一个
209
+
210
+ # 计算包名相似度
211
+ best_candidate = None
212
+ best_similarity = -1
213
+
214
+ for candidate in candidates:
215
+ candidate_package = '.'.join(candidate.split('.')[:-1])
216
+ similarity = self._calculate_package_similarity(candidate_package, current_package)
217
+
218
+ if similarity > best_similarity:
219
+ best_similarity = similarity
220
+ best_candidate = candidate
221
+
222
+ return best_candidate
223
+
224
+ def _calculate_package_similarity(self, package1: str, package2: str) -> float:
225
+ """
226
+ 计算两个包名的相似度
227
+
228
+ Args:
229
+ package1: 第一个包名
230
+ package2: 第二个包名
231
+
232
+ Returns:
233
+ 相似度分数(0-1)
234
+ """
235
+ if not package1 or not package2:
236
+ return 0.0
237
+
238
+ parts1 = package1.split('.')
239
+ parts2 = package2.split('.')
240
+
241
+ # 计算公共前缀长度
242
+ common_prefix = 0
243
+ for i in range(min(len(parts1), len(parts2))):
244
+ if parts1[i] == parts2[i]:
245
+ common_prefix += 1
246
+ else:
247
+ break
248
+
249
+ # 相似度 = 公共前缀长度 / 最大包深度
250
+ max_depth = max(len(parts1), len(parts2))
251
+ return common_prefix / max_depth if max_depth > 0 else 0.0
252
+
253
+ def get_classes_in_package(self, package_name: str) -> List[str]:
254
+ """
255
+ 获取指定包中的所有类
256
+
257
+ Args:
258
+ package_name: 包名
259
+
260
+ Returns:
261
+ 类名列表
262
+ """
263
+ if not self._is_built:
264
+ self.build_index()
265
+
266
+ return self._package_index.get(package_name, [])
267
+
268
+ def get_stats(self) -> dict:
269
+ """
270
+ 获取索引统计信息
271
+
272
+ Returns:
273
+ 包含索引统计的字典
274
+ """
275
+ with self._lock:
276
+ total_requests = self._stats['index_hits'] + self._stats['index_misses']
277
+ hit_rate = (self._stats['index_hits'] / total_requests * 100) if total_requests > 0 else 0
278
+
279
+ return {
280
+ 'total_classes': self._stats['total_classes'],
281
+ 'total_requests': total_requests,
282
+ 'index_hits': self._stats['index_hits'],
283
+ 'index_misses': self._stats['index_misses'],
284
+ 'hit_rate_percent': hit_rate,
285
+ 'basic_type_skips': self._stats['basic_type_skips'],
286
+ 'is_built': self._is_built
287
+ }
288
+
289
+ def _print_index_stats(self):
290
+ """打印索引构建统计"""
291
+ self.logger.info("📊 索引统计:")
292
+ self.logger.info(f" 精确索引: {len(self._exact_index)} 个类")
293
+ self.logger.info(f" 简单名索引: {len(self._simple_index)} 个条目")
294
+ self.logger.info(f" 后缀索引: {len(self._suffix_index)} 个条目")
295
+ self.logger.info(f" 包名索引: {len(self._package_index)} 个包")
296
+
297
+ def print_stats(self):
298
+ """打印使用统计信息"""
299
+ stats = self.get_stats()
300
+
301
+ self.logger.info("📊 类型索引统计:")
302
+ self.logger.info(f" 总类数: {stats['total_classes']}")
303
+ self.logger.info(f" 查询请求: {stats['total_requests']}")
304
+ self.logger.info(f" 索引命中: {stats['index_hits']}")
305
+ self.logger.info(f" 索引未命中: {stats['index_misses']}")
306
+ self.logger.info(f" 命中率: {stats['hit_rate_percent']:.1f}%")
307
+ self.logger.info(f" 基础类型跳过: {stats['basic_type_skips']}")
308
+
309
+
310
+ # 全局索引实例
311
+ _global_index = None
312
+ _index_lock = threading.Lock()
313
+
314
+
315
+ def get_type_index(sources_dir: Path = None) -> TypeMatchingIndex:
316
+ """
317
+ 获取全局类型索引实例(单例模式)
318
+
319
+ Args:
320
+ sources_dir: 源码目录(仅在首次调用时需要)
321
+
322
+ Returns:
323
+ TypeMatchingIndex实例
324
+ """
325
+ global _global_index
326
+
327
+ if _global_index is None:
328
+ with _index_lock:
329
+ if _global_index is None:
330
+ if sources_dir is None:
331
+ raise ValueError("首次调用 get_type_index 时必须提供 sources_dir 参数")
332
+ _global_index = TypeMatchingIndex(sources_dir)
333
+ _global_index.build_index()
334
+
335
+ return _global_index
336
+
337
+
338
+ def clear_global_index():
339
+ """清空全局索引"""
340
+ global _global_index
341
+ _global_index = None