code-explore-by-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,522 @@
1
+ """Code block summary strategies for the 'signature' view.
2
+
3
+ Reuses bracket_scanner for structural analysis and symbol_analyzer
4
+ helpers for declaration extraction. When DB child symbols are available,
5
+ skips text scanning and uses pre-classified symbol data directly.
6
+
7
+ All regex patterns are sourced from LanguageConfig and FrameworkConfig
8
+ rather than hardcoded.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from typing import Any
15
+
16
+ from .bracket_scanner import BracketBlock, compute_parent_map, scan_brackets
17
+ from .configs import FrameworkConfig, LanguageConfig
18
+ from .symbol_analyzer import _gather_declaration
19
+
20
+
21
+ def _is_comment_or_empty(s: str, lang: LanguageConfig) -> bool:
22
+ """Check if a stripped line is empty, a line comment, or starts a block comment."""
23
+ if not s:
24
+ return True
25
+ if lang.line_comment and s.startswith(lang.line_comment):
26
+ return True
27
+ if lang.block_comment_pair and s.startswith(lang.block_comment_pair[0]):
28
+ return True
29
+ return False
30
+
31
+
32
+ def _format_control_flow(lang: LanguageConfig, kw: str, cond: str) -> str:
33
+ """Format a control flow summary line based on language brace style."""
34
+ if lang.uses_braces:
35
+ return f" {kw} ({cond}) {{ ... }}"
36
+ return f" {kw} {cond}: ..."
37
+
38
+
39
+ # ── Dispatcher ───────────────────────────────────────────────────────────
40
+
41
+ _CLASS_TYPES = frozenset({"class", "namespace"})
42
+ _FUNC_TYPES = frozenset({"method", "function"})
43
+ _MACRO_TYPES = frozenset({"macro_def"})
44
+ _DECL_TYPES = frozenset({"enum", "delegate_def"})
45
+
46
+
47
+ _MAX_TRUNCATE_SUFFIX = "// ... truncated, {total} lines total"
48
+
49
+
50
+ def _truncate_lines(lines: list[str], max_lines: int) -> list[str]:
51
+ """Truncate result lines to max_lines, appending truncation notice if needed."""
52
+ if max_lines <= 0 or len(lines) <= max_lines:
53
+ return lines
54
+ return lines[: max_lines - 1] + [_MAX_TRUNCATE_SUFFIX.format(total=len(lines))]
55
+
56
+
57
+ def apply_view(
58
+ code: str,
59
+ view: str,
60
+ *,
61
+ block_type: str | None = None,
62
+ qualified_name: str | None = None,
63
+ child_symbols: list[dict[str, Any]] | None = None,
64
+ lang: LanguageConfig | None = None,
65
+ fw: FrameworkConfig | None = None,
66
+ max_lines: int = 0,
67
+ ) -> str:
68
+ if view == "full":
69
+ return code
70
+ if view == "meta":
71
+ return ""
72
+ if view != "signature":
73
+ return code
74
+
75
+ if not code or not code.strip():
76
+ return code
77
+
78
+ # For signature view, ensure we have a language config
79
+ if lang is None:
80
+ from .configs import make_cpp_language
81
+ lang = make_cpp_language()
82
+ if fw is None:
83
+ from .configs import make_generic_framework
84
+ fw = make_generic_framework()
85
+
86
+ strategy = _pick_strategy(block_type, code, lang)
87
+
88
+ if strategy == "class":
89
+ result = summarize_class_block(code, child_symbols=child_symbols, lang=lang, fw=fw)
90
+ elif strategy == "function":
91
+ result = summarize_function_body(code, lang=lang)
92
+ elif strategy == "macro":
93
+ result = summarize_macro_block(code, lang=lang, fw=fw)
94
+ else:
95
+ return code
96
+
97
+ if max_lines > 0:
98
+ result_lines = result.split("\n")
99
+ result_lines = _truncate_lines(result_lines, max_lines)
100
+ return "\n".join(result_lines)
101
+ return result
102
+
103
+
104
+ def _pick_strategy(block_type: str | None, code: str, lang: LanguageConfig) -> str:
105
+ if block_type:
106
+ if block_type in _CLASS_TYPES:
107
+ return "class"
108
+ if block_type in _FUNC_TYPES:
109
+ return "function"
110
+ if block_type in _MACRO_TYPES:
111
+ return "macro"
112
+ if block_type in _DECL_TYPES:
113
+ return "declaration"
114
+
115
+ block_kw_re = lang.block_keyword_re or re.compile(r"\b(?:class|struct)\b")
116
+
117
+ for line in code.split("\n"):
118
+ s = line.strip()
119
+ if _is_comment_or_empty(s, lang):
120
+ continue
121
+ if block_kw_re.search(s):
122
+ return "class"
123
+ if lang.preprocessor_prefix and s.startswith(lang.preprocessor_prefix) and "define" in s:
124
+ return "macro"
125
+ if "(" in s and ")" in s:
126
+ return "function"
127
+ break
128
+
129
+ return "unknown"
130
+
131
+
132
+ # ── Strategy 1: Class / struct / namespace ───────────────────────────────
133
+
134
+ def summarize_class_block(
135
+ code: str,
136
+ *,
137
+ child_symbols: list[dict[str, Any]] | None = None,
138
+ lang: LanguageConfig | None = None,
139
+ fw: FrameworkConfig | None = None,
140
+ ) -> str:
141
+ if child_symbols is not None:
142
+ return _class_summary_from_symbols(code, child_symbols, lang, fw)
143
+ return _class_summary_from_text(code, lang, fw)
144
+
145
+
146
+ def _class_summary_from_symbols(
147
+ code: str,
148
+ child_symbols: list[dict[str, Any]],
149
+ lang: LanguageConfig,
150
+ fw: FrameworkConfig,
151
+ ) -> str:
152
+ lines = code.split("\n")
153
+ total = len(lines)
154
+
155
+ child_by_start: dict[int, dict] = {}
156
+ for sym in child_symbols:
157
+ child_by_start[sym["start_line"]] = sym
158
+
159
+ # Get config-driven patterns
160
+ access_spec_names = lang.access_spec_names
161
+ deco_macro_re = fw.decoration_macro_re
162
+
163
+ kept: list[str] = []
164
+
165
+ for i, line in enumerate(lines, start=1):
166
+ stripped = line.strip()
167
+
168
+ if stripped in access_spec_names:
169
+ kept.append(line)
170
+ continue
171
+
172
+ if deco_macro_re and deco_macro_re.match(stripped):
173
+ kept.append(line)
174
+ continue
175
+
176
+ child = child_by_start.get(i)
177
+ if child:
178
+ btype = child["block_type"]
179
+ if btype in _FUNC_TYPES:
180
+ sig = child.get("signature") or _declaration_from_code(lines, i - 1, lang)
181
+ if sig:
182
+ kept.append(f" {sig};")
183
+ else:
184
+ kept.append(line)
185
+ continue
186
+ if btype in _CLASS_TYPES:
187
+ kept.append(line)
188
+ kept.append(f" {lang.summary_comment_prefix} ... {btype} {child.get('qualified_name', '')} ...")
189
+ kept.append(lang.block_close_suffix)
190
+ continue
191
+ if btype == "enum":
192
+ kept.append(line)
193
+ continue
194
+ kept.append(line)
195
+ continue
196
+
197
+ if i in child_by_start:
198
+ continue
199
+
200
+ if not _is_inside_child_block(i, child_by_start):
201
+ if (
202
+ lang.statement_terminator and lang.statement_terminator in stripped
203
+ ) and not _is_comment_or_empty(stripped, lang):
204
+ kept.append(line)
205
+
206
+ kept.append(f"{lang.summary_comment_prefix} ... {total} lines total")
207
+ return "\n".join(kept)
208
+
209
+
210
+ def _declaration_from_code(lines: list[str], line_idx_0: int, lang: LanguageConfig | None = None) -> str | None:
211
+ start = line_idx_0
212
+ while start > 0 and lines[start - 1].strip().startswith(lang.line_comment if lang else "//"):
213
+ start -= 1
214
+ for j in range(start, min(start + 6, len(lines))):
215
+ if "{" in lines[j]:
216
+ decl = " ".join(ln.strip() for ln in lines[start:j + 1])
217
+ brace_pos = decl.find("{")
218
+ if brace_pos > 0:
219
+ return decl[:brace_pos].rstrip()
220
+ return decl
221
+ return None
222
+
223
+
224
+ def _is_inside_child_block(
225
+ line_1based: int,
226
+ child_by_start: dict[int, dict],
227
+ ) -> bool:
228
+ for start, sym in child_by_start.items():
229
+ end = sym.get("end_line", start)
230
+ if start < line_1based <= end:
231
+ if sym["block_type"] in _FUNC_TYPES:
232
+ return True
233
+ if sym["block_type"] in _CLASS_TYPES:
234
+ return start < line_1based < end
235
+ return False
236
+
237
+
238
+ def _class_summary_from_text(
239
+ code: str,
240
+ lang: LanguageConfig,
241
+ fw: FrameworkConfig,
242
+ ) -> str:
243
+ blocks = scan_brackets(
244
+ code,
245
+ verbatim_string_prefix=lang.verbatim_string_prefix,
246
+ raw_string_char=lang.raw_string_char,
247
+ )
248
+ if not blocks:
249
+ return code
250
+
251
+ parent_map = compute_parent_map(blocks)
252
+ lines = code.split("\n")
253
+
254
+ # Get config-driven patterns
255
+ access_spec_names = lang.access_spec_names
256
+ deco_macro_re = fw.decoration_macro_re
257
+
258
+ outermost = min(blocks, key=lambda b: b.depth)
259
+ outer_key = (outermost.open_line, outermost.depth)
260
+
261
+ child_blocks: list[BracketBlock] = []
262
+ for blk in blocks:
263
+ if blk is outermost:
264
+ continue
265
+ key = (blk.open_line, blk.depth)
266
+ p = parent_map.get(key)
267
+ if p == outer_key:
268
+ child_blocks.append(blk)
269
+
270
+ child_lines: set[int] = set()
271
+ for blk in child_blocks:
272
+ for ln in range(blk.open_line, blk.close_line + 1):
273
+ child_lines.add(ln)
274
+
275
+ kept: list[str] = []
276
+ for i, line in enumerate(lines, start=1):
277
+ stripped = line.strip()
278
+
279
+ if i <= outermost.open_line:
280
+ kept.append(line)
281
+ continue
282
+
283
+ if i >= outermost.close_line:
284
+ kept.append(line)
285
+ continue
286
+
287
+ if stripped in access_spec_names:
288
+ kept.append(line)
289
+ continue
290
+
291
+ if deco_macro_re and deco_macro_re.match(stripped):
292
+ kept.append(line)
293
+ continue
294
+
295
+ if i in child_lines:
296
+ for blk in child_blocks:
297
+ if blk.open_line == i:
298
+ decl_lines = _gather_declaration(lines, i - 1, lang, fw)
299
+ if decl_lines:
300
+ decl = " ".join(decl_lines).rstrip()
301
+ brace_pos = decl.find("{")
302
+ if brace_pos > 0:
303
+ decl = decl[:brace_pos].rstrip()
304
+ kept.append(f" {decl};")
305
+ else:
306
+ kept.append(line)
307
+ continue
308
+
309
+ if (
310
+ lang.statement_terminator and lang.statement_terminator in stripped
311
+ ) and not _is_comment_or_empty(stripped, lang):
312
+ kept.append(line)
313
+
314
+ kept.append(f"{lang.summary_comment_prefix} ... {len(lines)} lines total")
315
+ return "\n".join(kept)
316
+
317
+
318
+ # ── Strategy 2: Macro definitions ───────────────────────────────────────
319
+
320
+ def summarize_macro_block(
321
+ code: str,
322
+ lang: LanguageConfig,
323
+ fw: FrameworkConfig,
324
+ ) -> str:
325
+ blocks = scan_brackets(
326
+ code,
327
+ verbatim_string_prefix=lang.verbatim_string_prefix,
328
+ raw_string_char=lang.raw_string_char,
329
+ )
330
+ if not blocks:
331
+ return _macro_fallback(code, lang)
332
+
333
+ parent_map = compute_parent_map(blocks)
334
+ lines = code.split("\n")
335
+
336
+ if blocks:
337
+ outermost = min(blocks, key=lambda b: b.depth)
338
+ else:
339
+ return code
340
+
341
+ outer_key = (outermost.open_line, outermost.depth)
342
+
343
+ child_blocks: list[BracketBlock] = []
344
+ for blk in blocks:
345
+ if blk is outermost:
346
+ continue
347
+ key = (blk.open_line, blk.depth)
348
+ p = parent_map.get(key)
349
+ if p == outer_key:
350
+ child_blocks.append(blk)
351
+
352
+ child_lines: set[int] = set()
353
+ for blk in child_blocks:
354
+ for ln in range(blk.open_line, blk.close_line + 1):
355
+ child_lines.add(ln)
356
+
357
+ kept: list[str] = []
358
+ for i, line in enumerate(lines, start=1):
359
+ stripped = line.strip()
360
+
361
+ if i <= outermost.open_line:
362
+ kept.append(line)
363
+ continue
364
+ if i >= outermost.close_line:
365
+ kept.append(line)
366
+ continue
367
+
368
+ if i in child_lines:
369
+ for blk in child_blocks:
370
+ if blk.open_line == i:
371
+ decl_lines = _gather_declaration(lines, i - 1, lang, fw)
372
+ if decl_lines:
373
+ decl = " ".join(decl_lines).rstrip()
374
+ brace_pos = decl.find("{")
375
+ if brace_pos > 0:
376
+ decl = decl[:brace_pos].rstrip()
377
+ kept.append(f" {decl};")
378
+ else:
379
+ kept.append(line)
380
+ continue
381
+
382
+ if (
383
+ lang.statement_terminator and lang.statement_terminator in stripped
384
+ ) and not _is_comment_or_empty(stripped, lang):
385
+ kept.append(line)
386
+
387
+ kept.append(f"{lang.summary_comment_prefix} ... {len(lines)} lines total")
388
+ return "\n".join(kept)
389
+
390
+
391
+ def _macro_fallback(code: str, lang: LanguageConfig) -> str:
392
+ lines = code.split("\n")
393
+ kept: list[str] = []
394
+ for line in lines:
395
+ stripped = line.strip()
396
+ if _is_comment_or_empty(stripped, lang):
397
+ continue
398
+ if lang.statement_terminator and lang.statement_terminator in stripped:
399
+ kept.append(line)
400
+ kept.append(f"{lang.summary_comment_prefix} ... {len(lines)} lines total")
401
+ return "\n".join(kept)
402
+
403
+
404
+ # ── Strategy 3: Function body ───────────────────────────────────────────
405
+
406
+ def summarize_function_body(
407
+ code: str,
408
+ lang: LanguageConfig,
409
+ ) -> str:
410
+ lines = code.split("\n")
411
+
412
+ brace_idx = _find_first_open_brace(lines)
413
+ if brace_idx < 0:
414
+ return code
415
+
416
+ sig_lines = lines[: brace_idx + 1]
417
+ body_lines = lines[brace_idx + 1 :]
418
+
419
+ summary_lines: list[str] = list(sig_lines)
420
+
421
+ body_content = "\n".join(body_lines)
422
+ body_stripped = body_content.strip()
423
+
424
+ if not body_stripped or body_stripped == "}":
425
+ return code
426
+
427
+ # Build language-aware regexes from config
428
+ scope_op_escaped = re.escape(lang.scope_operator)
429
+ local_var_re = re.compile(
430
+ r"^\s*"
431
+ rf"(?:(?:{lang.local_var_modifiers})\s+)?"
432
+ rf"([A-Za-z_]\w*(?:\s*{scope_op_escaped}\s*\w+)*(?:\s*<[^>]*>)?(?:\s*[*&]+)?)\s+"
433
+ r"([A-Za-z_]\w*)"
434
+ r"\s*(?:=[^;]*|)\s*;"
435
+ )
436
+
437
+ range_for_re = lang.range_for_re
438
+
439
+ control_flow_res = [(regex, label) for label, regex in lang.control_flow_patterns]
440
+
441
+ return_re = lang.return_re or re.compile(r"(?!x)x") # never-match if None
442
+
443
+ for line in body_lines:
444
+ stripped = line.strip()
445
+ if _is_comment_or_empty(stripped, lang) or stripped in ("{", "}", "{})", "};"):
446
+ continue
447
+
448
+ m = local_var_re.match(stripped)
449
+ if m:
450
+ vtype = m.group(1).strip()
451
+ vname = m.group(2)
452
+ init_match = re.search(r"=\s*([^;]+);", stripped)
453
+ if init_match:
454
+ init_val = init_match.group(1).strip()
455
+ if len(init_val) > 40:
456
+ init_val = init_val[:37] + "..."
457
+ summary_lines.append(f" {vtype} {vname} = {init_val};")
458
+ else:
459
+ summary_lines.append(f" {vtype} {vname};")
460
+ continue
461
+
462
+ if range_for_re:
463
+ rf = range_for_re.search(stripped)
464
+ if rf:
465
+ groups = rf.groups()
466
+ if len(groups) >= 3:
467
+ desc = f"{groups[0]} {groups[1]} : {groups[2]}"
468
+ else:
469
+ desc = " ".join(groups)
470
+ summary_lines.append(_format_control_flow(lang, "for", desc))
471
+ continue
472
+
473
+ matched_flow = False
474
+ for pat, kw in control_flow_res:
475
+ m = pat.match(line)
476
+ if m:
477
+ cond = m.group(1).strip()
478
+ if len(cond) > 60:
479
+ cond = cond[:57] + "..."
480
+ summary_lines.append(_format_control_flow(lang, kw, cond))
481
+ matched_flow = True
482
+ break
483
+ if matched_flow:
484
+ continue
485
+
486
+ rm = return_re.match(stripped)
487
+ if rm:
488
+ retval = rm.group(1).strip()
489
+ if len(retval) > 60:
490
+ retval = retval[:57] + "..."
491
+ summary_lines.append(f" return {retval}{lang.statement_terminator}")
492
+ continue
493
+
494
+ closing = _find_closing_brace(body_lines)
495
+ if closing is not None:
496
+ summary_lines.append(closing)
497
+
498
+ body_count = len(body_lines)
499
+ summary_lines.append(f"{lang.summary_comment_prefix} ... {body_count} lines in body")
500
+
501
+ return "\n".join(summary_lines)
502
+
503
+
504
+ def _find_first_open_brace(lines: list[str]) -> int:
505
+ depth = 0
506
+ for i, line in enumerate(lines):
507
+ for ch in line:
508
+ if ch == "{":
509
+ if depth == 0:
510
+ return i
511
+ depth += 1
512
+ elif ch == "}":
513
+ depth -= 1
514
+ return -1
515
+
516
+
517
+ def _find_closing_brace(lines: list[str]) -> str | None:
518
+ for line in reversed(lines):
519
+ s = line.strip()
520
+ if s.startswith("}"):
521
+ return line
522
+ return None