code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,646 @@
1
+ """
2
+ AST-based Code Chunker for Claude Context
3
+
4
+ Adapted from the original vibe2doc AST chunker to provide semantic code chunking
5
+ for the Claude Context MCP integration. Uses tree-sitter for accurate parsing
6
+ with a simple line-based fallback for edge cases.
7
+
8
+ Supports: Python, JavaScript, TypeScript, Go
9
+ """
10
+
11
+ import re
12
+ import logging
13
+ from typing import List, Dict, Any, Optional, Tuple
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Try to import tree-sitter dependencies
20
+ try:
21
+ import tree_sitter_python as tspython
22
+ import tree_sitter_javascript as tsjavascript
23
+ import tree_sitter_typescript as tstypescript
24
+ import tree_sitter_go as tsgo
25
+ from tree_sitter import Language, Parser, Node
26
+ HAS_TREE_SITTER = True
27
+ logger.info("Tree-sitter available for AST parsing")
28
+ except ImportError as e:
29
+ HAS_TREE_SITTER = False
30
+ Node = Any # Type hint fallback
31
+ logger.warning(f"Tree-sitter not available, using line-based fallback: {e}")
32
+
33
+
34
+ @dataclass
35
+ class CodeChunk:
36
+ """Represents a semantically complete code chunk"""
37
+ content: str
38
+ chunk_type: str # 'function', 'class', 'method', 'import_block', 'module', 'block'
39
+ name: Optional[str] = None
40
+ start_line: int = 0
41
+ end_line: int = 0
42
+ language: str = ""
43
+ parent_context: Optional[str] = None
44
+ docstring: Optional[str] = None # Extracted docstring content (for functions/classes)
45
+ size_chars: int = field(default=0, init=False)
46
+ metadata: Dict[str, Any] = field(default_factory=dict)
47
+
48
+ def __post_init__(self):
49
+ # Count non-whitespace characters for size
50
+ if self.size_chars == 0:
51
+ self.size_chars = len(re.sub(r'\s', '', self.content))
52
+
53
+ def to_dict(self) -> Dict[str, Any]:
54
+ """Convert to dictionary for storage"""
55
+ return {
56
+ "content": self.content,
57
+ "chunk_type": self.chunk_type,
58
+ "name": self.name,
59
+ "start_line": self.start_line,
60
+ "end_line": self.end_line,
61
+ "language": self.language,
62
+ "parent_context": self.parent_context,
63
+ "docstring": self.docstring,
64
+ "size_chars": self.size_chars,
65
+ "metadata": self.metadata
66
+ }
67
+
68
+
69
+ class ASTChunker:
70
+ """
71
+ AST-based code chunker for semantic code understanding.
72
+
73
+ Primary: Uses tree-sitter for accurate AST parsing
74
+ Fallback: Simple line-based chunking for edge cases
75
+ """
76
+
77
+ def __init__(self, max_chunk_size: int = 1500, chunk_overlap: int = 100):
78
+ """
79
+ Initialize the AST chunker.
80
+
81
+ Args:
82
+ max_chunk_size: Maximum chunk size in lines (for fallback)
83
+ chunk_overlap: Number of lines to overlap (for fallback)
84
+ """
85
+ self.max_chunk_size = max_chunk_size
86
+ self.chunk_overlap = chunk_overlap
87
+ self.supported_languages = ['python', 'javascript', 'typescript', 'go']
88
+ self.parsers = {}
89
+
90
+ if HAS_TREE_SITTER:
91
+ self._initialize_parsers()
92
+ else:
93
+ logger.warning("Using line-based chunking fallback")
94
+
95
+ def _initialize_parsers(self):
96
+ """Initialize tree-sitter parsers for supported languages"""
97
+ try:
98
+ # Python parser
99
+ PY_LANGUAGE = Language(tspython.language())
100
+ self.parsers['python'] = Parser(PY_LANGUAGE)
101
+ logger.debug("Python parser initialized")
102
+ except Exception as e:
103
+ logger.warning(f"Failed to initialize Python parser: {e}")
104
+
105
+ try:
106
+ # JavaScript parser
107
+ JS_LANGUAGE = Language(tsjavascript.language())
108
+ self.parsers['javascript'] = Parser(JS_LANGUAGE)
109
+ logger.debug("JavaScript parser initialized")
110
+ except Exception as e:
111
+ logger.warning(f"Failed to initialize JavaScript parser: {e}")
112
+
113
+ try:
114
+ # TypeScript parser (uses tsx variant)
115
+ TS_LANGUAGE = Language(tstypescript.language_tsx())
116
+ self.parsers['typescript'] = Parser(TS_LANGUAGE)
117
+ logger.debug("TypeScript parser initialized")
118
+ except Exception as e:
119
+ logger.warning(f"Failed to initialize TypeScript parser: {e}")
120
+
121
+ try:
122
+ # Go parser
123
+ GO_LANGUAGE = Language(tsgo.language())
124
+ self.parsers['go'] = Parser(GO_LANGUAGE)
125
+ logger.debug("Go parser initialized")
126
+ except Exception as e:
127
+ logger.warning(f"Failed to initialize Go parser: {e}")
128
+
129
+ def chunk_file(self, file_path: Path, content: Optional[str] = None) -> List[CodeChunk]:
130
+ """
131
+ Chunk a file into semantic units.
132
+
133
+ Args:
134
+ file_path: Path to the file
135
+ content: Optional content (if already loaded)
136
+
137
+ Returns:
138
+ List of CodeChunk objects
139
+ """
140
+ if content is None:
141
+ try:
142
+ content = file_path.read_text(encoding='utf-8', errors='ignore')
143
+ except Exception as e:
144
+ logger.error(f"Failed to read {file_path}: {e}")
145
+ return []
146
+
147
+ # Detect language from extension
148
+ language = self._detect_language(file_path)
149
+
150
+ # Try AST parsing first
151
+ if HAS_TREE_SITTER and language in self.parsers:
152
+ try:
153
+ return self._chunk_with_ast(content, language, str(file_path))
154
+ except Exception as e:
155
+ logger.warning(f"AST parsing failed for {file_path}, using fallback: {e}")
156
+
157
+ # Fallback to line-based chunking
158
+ return self._chunk_with_lines(content, language, str(file_path))
159
+
160
+ def _detect_language(self, file_path: Path) -> str:
161
+ """Detect programming language from file extension"""
162
+ ext_to_lang = {
163
+ '.py': 'python',
164
+ '.js': 'javascript',
165
+ '.jsx': 'javascript',
166
+ '.ts': 'typescript',
167
+ '.tsx': 'typescript',
168
+ '.go': 'go',
169
+ }
170
+ return ext_to_lang.get(file_path.suffix.lower(), 'unknown')
171
+
172
+ def _chunk_with_ast(self, content: str, language: str, file_path: str) -> List[CodeChunk]:
173
+ """
174
+ Chunk code using tree-sitter AST parsing.
175
+
176
+ This provides semantic chunking that respects:
177
+ - Module-level docstrings (extracted first for Python)
178
+ - Function boundaries
179
+ - Class definitions
180
+ - Method boundaries
181
+ - Import blocks
182
+ """
183
+ if language not in self.parsers:
184
+ raise ValueError(f"No parser available for {language}")
185
+
186
+ parser = self.parsers[language]
187
+ tree = parser.parse(bytes(content, "utf8"))
188
+
189
+ chunks = []
190
+
191
+ # Extract module-level docstring for Python files
192
+ if language == 'python':
193
+ module_docstring = self._extract_module_docstring(tree.root_node, content)
194
+ if module_docstring and len(module_docstring.strip()) > 20:
195
+ # Create a module chunk with the docstring
196
+ # Extract module name from file path
197
+ module_name = Path(file_path).stem
198
+ chunks.append(CodeChunk(
199
+ content=module_docstring,
200
+ chunk_type='module_docstring',
201
+ name=module_name,
202
+ start_line=1,
203
+ end_line=module_docstring.count('\n') + 1,
204
+ language=language,
205
+ parent_context=None,
206
+ docstring=module_docstring,
207
+ metadata={
208
+ "node_type": "module",
209
+ "has_error": False,
210
+ "has_docstring": True,
211
+ "is_module_docstring": True
212
+ }
213
+ ))
214
+
215
+ # Extract semantic units from the AST
216
+ chunks.extend(self._extract_chunks_from_node(
217
+ tree.root_node, content, language, file_path
218
+ ))
219
+
220
+ return chunks
221
+
222
+ def _extract_chunks_from_node(
223
+ self,
224
+ node: Node,
225
+ source: str,
226
+ language: str,
227
+ file_path: str,
228
+ parent_context: Optional[str] = None
229
+ ) -> List[CodeChunk]:
230
+ """Recursively extract chunks from AST nodes"""
231
+ chunks = []
232
+
233
+ # Determine if this node should be a chunk
234
+ if self._should_chunk_node(node, language):
235
+ chunk = self._create_chunk_from_node(node, source, language, parent_context)
236
+ if chunk:
237
+ chunks.append(chunk)
238
+ # Update parent context for children
239
+ if chunk.name:
240
+ parent_context = chunk.name
241
+
242
+ # Process children
243
+ for child in node.children:
244
+ chunks.extend(self._extract_chunks_from_node(
245
+ child, source, language, file_path, parent_context
246
+ ))
247
+
248
+ return chunks
249
+
250
+ def _should_chunk_node(self, node: Node, language: str) -> bool:
251
+ """Determine if a node should become its own chunk"""
252
+ chunk_node_types = {
253
+ 'python': [
254
+ 'function_definition', 'class_definition',
255
+ 'decorated_definition', 'import_from_statement',
256
+ 'import_statement'
257
+ ],
258
+ 'javascript': [
259
+ 'function_declaration', 'class_declaration',
260
+ 'method_definition', 'arrow_function',
261
+ 'function_expression', 'import_statement',
262
+ 'export_statement'
263
+ ],
264
+ 'typescript': [
265
+ 'function_declaration', 'class_declaration',
266
+ 'method_definition', 'arrow_function',
267
+ 'interface_declaration', 'type_alias_declaration',
268
+ 'import_statement', 'export_statement'
269
+ ],
270
+ 'go': [
271
+ 'function_declaration', 'method_declaration',
272
+ 'type_declaration', 'interface_declaration',
273
+ 'import_declaration'
274
+ ]
275
+ }
276
+
277
+ node_types = chunk_node_types.get(language, [])
278
+ return node.type in node_types
279
+
280
+ def _create_chunk_from_node(
281
+ self,
282
+ node: Node,
283
+ source: str,
284
+ language: str,
285
+ parent_context: Optional[str] = None
286
+ ) -> Optional[CodeChunk]:
287
+ """Create a CodeChunk from an AST node"""
288
+ # Extract content
289
+ start_byte = node.start_byte
290
+ end_byte = node.end_byte
291
+ content = source[start_byte:end_byte]
292
+
293
+ if not content.strip():
294
+ return None
295
+
296
+ # Determine chunk type and name
297
+ chunk_type, name = self._analyze_node(node, language)
298
+
299
+ # Extract docstring for Python functions and classes
300
+ docstring = None
301
+ if language == 'python' and node.type in ['function_definition', 'class_definition']:
302
+ docstring = self._extract_python_docstring(node, source)
303
+
304
+ return CodeChunk(
305
+ content=content,
306
+ chunk_type=chunk_type,
307
+ name=name,
308
+ start_line=node.start_point[0] + 1,
309
+ end_line=node.end_point[0] + 1,
310
+ language=language,
311
+ parent_context=parent_context,
312
+ docstring=docstring,
313
+ metadata={
314
+ "node_type": node.type,
315
+ "has_error": node.has_error,
316
+ "has_docstring": docstring is not None
317
+ }
318
+ )
319
+
320
+ def _analyze_node(self, node: Node, language: str) -> Tuple[str, Optional[str]]:
321
+ """Analyze node to determine chunk type and extract name"""
322
+ node_type = node.type
323
+ name = None
324
+
325
+ # Language-specific analysis
326
+ if language == 'python':
327
+ if node_type == 'function_definition':
328
+ chunk_type = 'function'
329
+ name = self._extract_python_function_name(node)
330
+ elif node_type == 'class_definition':
331
+ chunk_type = 'class'
332
+ name = self._extract_python_class_name(node)
333
+ elif node_type in ['import_statement', 'import_from_statement']:
334
+ chunk_type = 'import_block'
335
+ else:
336
+ chunk_type = 'module'
337
+
338
+ elif language in ['javascript', 'typescript']:
339
+ if node_type in ['function_declaration', 'function_expression']:
340
+ chunk_type = 'function'
341
+ name = self._extract_js_function_name(node)
342
+ elif node_type == 'class_declaration':
343
+ chunk_type = 'class'
344
+ name = self._extract_js_class_name(node)
345
+ elif node_type == 'arrow_function':
346
+ chunk_type = 'function'
347
+ name = 'arrow_function'
348
+ elif node_type == 'method_definition':
349
+ chunk_type = 'method'
350
+ name = self._extract_js_method_name(node)
351
+ elif node_type in ['import_statement', 'export_statement']:
352
+ chunk_type = 'import_block'
353
+ else:
354
+ chunk_type = 'module'
355
+
356
+ elif language == 'go':
357
+ if node_type == 'function_declaration':
358
+ chunk_type = 'function'
359
+ name = self._extract_go_function_name(node)
360
+ elif node_type == 'method_declaration':
361
+ chunk_type = 'method'
362
+ name = self._extract_go_method_name(node)
363
+ elif node_type in ['type_declaration', 'interface_declaration']:
364
+ chunk_type = 'type'
365
+ name = self._extract_go_type_name(node)
366
+ elif node_type == 'import_declaration':
367
+ chunk_type = 'import_block'
368
+ else:
369
+ chunk_type = 'module'
370
+ else:
371
+ chunk_type = 'unknown'
372
+
373
+ return chunk_type, name
374
+
375
+ def _extract_python_function_name(self, node: Node) -> Optional[str]:
376
+ """Extract function name from Python AST node"""
377
+ for child in node.children:
378
+ if child.type == 'identifier':
379
+ return child.text.decode('utf-8')
380
+ return None
381
+
382
+ def _extract_python_class_name(self, node: Node) -> Optional[str]:
383
+ """Extract class name from Python AST node"""
384
+ for child in node.children:
385
+ if child.type == 'identifier':
386
+ return child.text.decode('utf-8')
387
+ return None
388
+
389
+ def _extract_js_function_name(self, node: Node) -> Optional[str]:
390
+ """Extract function name from JavaScript/TypeScript AST node"""
391
+ for child in node.children:
392
+ if child.type == 'identifier':
393
+ return child.text.decode('utf-8')
394
+ return None
395
+
396
+ def _extract_js_class_name(self, node: Node) -> Optional[str]:
397
+ """Extract class name from JavaScript/TypeScript AST node"""
398
+ for child in node.children:
399
+ if child.type == 'identifier':
400
+ return child.text.decode('utf-8')
401
+ return None
402
+
403
+ def _extract_js_method_name(self, node: Node) -> Optional[str]:
404
+ """Extract method name from JavaScript/TypeScript AST node"""
405
+ for child in node.children:
406
+ if child.type == 'property_identifier':
407
+ return child.text.decode('utf-8')
408
+ return None
409
+
410
+ def _extract_go_function_name(self, node: Node) -> Optional[str]:
411
+ """Extract function name from Go AST node"""
412
+ for child in node.children:
413
+ if child.type == 'identifier':
414
+ return child.text.decode('utf-8')
415
+ return None
416
+
417
+ def _extract_go_method_name(self, node: Node) -> Optional[str]:
418
+ """Extract method name from Go AST node"""
419
+ for child in node.children:
420
+ if child.type == 'field_identifier':
421
+ return child.text.decode('utf-8')
422
+ return None
423
+
424
+ def _extract_go_type_name(self, node: Node) -> Optional[str]:
425
+ """Extract type name from Go AST node"""
426
+ spec_list = None
427
+ for child in node.children:
428
+ if child.type == 'type_spec':
429
+ spec_list = child
430
+ break
431
+
432
+ if spec_list:
433
+ for child in spec_list.children:
434
+ if child.type == 'type_identifier':
435
+ return child.text.decode('utf-8')
436
+ return None
437
+
438
+ def _extract_python_docstring(self, node: Node, source: str) -> Optional[str]:
439
+ """
440
+ Extract docstring from a Python function/class node.
441
+
442
+ Python docstrings are the first expression_statement containing a string
443
+ inside the block of a function_definition or class_definition.
444
+ """
445
+ for child in node.children:
446
+ if child.type == 'block':
447
+ for block_child in child.children:
448
+ if block_child.type == 'expression_statement':
449
+ for expr_child in block_child.children:
450
+ if expr_child.type == 'string':
451
+ raw = source[expr_child.start_byte:expr_child.end_byte]
452
+ return self._clean_docstring(raw)
453
+ # First expression_statement wasn't a string, no docstring
454
+ return None
455
+ return None
456
+
457
+ def _extract_module_docstring(self, root_node: Node, source: str) -> Optional[str]:
458
+ """
459
+ Extract module-level docstring from the root node of a Python file.
460
+
461
+ Module docstrings are the first expression_statement containing a string
462
+ at the top level of the module (direct child of the module/root node).
463
+ These often contain:
464
+ - Overall module purpose and description
465
+ - Usage examples
466
+ - Algorithm explanations
467
+ - Paper references (arXiv, DOI, etc.)
468
+ """
469
+ for child in root_node.children:
470
+ if child.type == 'expression_statement':
471
+ for expr_child in child.children:
472
+ if expr_child.type == 'string':
473
+ raw = source[expr_child.start_byte:expr_child.end_byte]
474
+ return self._clean_docstring(raw)
475
+ # First expression_statement wasn't a string, no module docstring
476
+ return None
477
+ # Skip comments but stop at any other statement type
478
+ elif child.type not in ['comment']:
479
+ return None
480
+ return None
481
+
482
+ def _clean_docstring(self, raw: str) -> str:
483
+ """Clean docstring by removing quotes and normalizing whitespace."""
484
+ # Remove triple quotes (both styles)
485
+ if raw.startswith('"""') and raw.endswith('"""'):
486
+ raw = raw[3:-3]
487
+ elif raw.startswith("'''") and raw.endswith("'''"):
488
+ raw = raw[3:-3]
489
+ # Remove single quotes (less common but valid)
490
+ elif raw.startswith('"') and raw.endswith('"'):
491
+ raw = raw[1:-1]
492
+ elif raw.startswith("'") and raw.endswith("'"):
493
+ raw = raw[1:-1]
494
+ return raw.strip()
495
+
496
+ def _chunk_with_lines(self, content: str, language: str, file_path: str) -> List[CodeChunk]:
497
+ """
498
+ Simple line-based chunking fallback.
499
+
500
+ This is used when tree-sitter is unavailable or fails.
501
+ Creates overlapping chunks based on line count.
502
+ """
503
+ lines = content.split('\n')
504
+ chunks = []
505
+
506
+ # Simple heuristic: try to detect function/class boundaries
507
+ i = 0
508
+ while i < len(lines):
509
+ # Look for potential start markers
510
+ chunk_lines = []
511
+ chunk_start = i
512
+
513
+ # Determine chunk type based on content
514
+ first_line = lines[i].strip() if i < len(lines) else ""
515
+ chunk_type = self._detect_chunk_type_from_line(first_line, language)
516
+ name = self._extract_name_from_line(first_line, language)
517
+
518
+ # Collect lines for this chunk
519
+ indent_level = len(lines[i]) - len(lines[i].lstrip()) if i < len(lines) else 0
520
+
521
+ # Add lines until we hit size limit or dedent
522
+ while i < len(lines) and len(chunk_lines) < self.max_chunk_size:
523
+ line = lines[i]
524
+ current_indent = len(line) - len(line.lstrip())
525
+
526
+ # Check for end of logical block (dedent)
527
+ if chunk_lines and line.strip() and current_indent < indent_level:
528
+ # Skip if it's just a blank line
529
+ if i + 1 < len(lines):
530
+ next_indent = len(lines[i + 1]) - len(lines[i + 1].lstrip())
531
+ if next_indent >= indent_level:
532
+ chunk_lines.append(line)
533
+ i += 1
534
+ continue
535
+ break
536
+
537
+ chunk_lines.append(line)
538
+ i += 1
539
+
540
+ # Create chunk if we have content
541
+ if chunk_lines and any(line.strip() for line in chunk_lines):
542
+ chunks.append(CodeChunk(
543
+ content='\n'.join(chunk_lines),
544
+ chunk_type=chunk_type if chunk_type else 'block',
545
+ name=name,
546
+ start_line=chunk_start + 1,
547
+ end_line=chunk_start + len(chunk_lines),
548
+ language=language,
549
+ metadata={"chunking_method": "line-based"}
550
+ ))
551
+
552
+ # Move back for overlap if chunk is large
553
+ if len(chunk_lines) > self.chunk_overlap:
554
+ i -= self.chunk_overlap
555
+
556
+ # Ensure we make progress
557
+ if i <= chunk_start:
558
+ i = chunk_start + 1
559
+
560
+ return chunks
561
+
562
+ def _detect_chunk_type_from_line(self, line: str, language: str) -> Optional[str]:
563
+ """Simple heuristic to detect chunk type from a line"""
564
+ line = line.strip()
565
+
566
+ if language == 'python':
567
+ if line.startswith('def '):
568
+ return 'function'
569
+ elif line.startswith('class '):
570
+ return 'class'
571
+ elif line.startswith(('import ', 'from ')):
572
+ return 'import_block'
573
+
574
+ elif language in ['javascript', 'typescript']:
575
+ if 'function ' in line or 'const ' in line and '=>' in line:
576
+ return 'function'
577
+ elif line.startswith('class '):
578
+ return 'class'
579
+ elif line.startswith(('import ', 'export ')):
580
+ return 'import_block'
581
+
582
+ elif language == 'go':
583
+ if line.startswith('func '):
584
+ return 'function'
585
+ elif line.startswith('type '):
586
+ return 'type'
587
+ elif line.startswith('import'):
588
+ return 'import_block'
589
+
590
+ return None
591
+
592
+ def _extract_name_from_line(self, line: str, language: str) -> Optional[str]:
593
+ """Simple heuristic to extract name from a line"""
594
+ line = line.strip()
595
+
596
+ if language == 'python':
597
+ if line.startswith('def '):
598
+ match = re.match(r'def\s+(\w+)', line)
599
+ return match.group(1) if match else None
600
+ elif line.startswith('class '):
601
+ match = re.match(r'class\s+(\w+)', line)
602
+ return match.group(1) if match else None
603
+
604
+ elif language in ['javascript', 'typescript']:
605
+ if 'function ' in line:
606
+ match = re.search(r'function\s+(\w+)', line)
607
+ return match.group(1) if match else None
608
+ elif line.startswith('class '):
609
+ match = re.match(r'class\s+(\w+)', line)
610
+ return match.group(1) if match else None
611
+ elif 'const ' in line and '=>' in line:
612
+ match = re.match(r'const\s+(\w+)', line)
613
+ return match.group(1) if match else None
614
+
615
+ elif language == 'go':
616
+ if line.startswith('func '):
617
+ match = re.match(r'func\s+(?:\([^)]+\)\s+)?(\w+)', line)
618
+ return match.group(1) if match else None
619
+ elif line.startswith('type '):
620
+ match = re.match(r'type\s+(\w+)', line)
621
+ return match.group(1) if match else None
622
+
623
+ return None
624
+
625
+
626
+ # Convenience function for easy import
627
+ def chunk_code(
628
+ file_path: Path,
629
+ content: Optional[str] = None,
630
+ max_chunk_size: int = 1500,
631
+ chunk_overlap: int = 100
632
+ ) -> List[CodeChunk]:
633
+ """
634
+ Convenience function to chunk code from a file.
635
+
636
+ Args:
637
+ file_path: Path to the file
638
+ content: Optional pre-loaded content
639
+ max_chunk_size: Maximum lines per chunk (for fallback)
640
+ chunk_overlap: Line overlap (for fallback)
641
+
642
+ Returns:
643
+ List of CodeChunk objects
644
+ """
645
+ chunker = ASTChunker(max_chunk_size, chunk_overlap)
646
+ return chunker.chunk_file(file_path, content)