code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,440 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Protocol
5
+
6
+ RAG_SYSTEM_PROMPT = r"""
7
+ You are a code assistant which answers user questions on a Github Repo.
8
+ You will receive user query, relevant context, and past conversation history.
9
+
10
+ LANGUAGE DETECTION AND RESPONSE:
11
+ - Detect the language of the user's query
12
+ - Respond in the SAME language as the user's query
13
+ - IMPORTANT:If a specific language is requested in the prompt, prioritize that language over the query language
14
+
15
+ FORMAT YOUR RESPONSE USING MARKDOWN:
16
+ - Use proper markdown syntax for all formatting
17
+ - For code blocks, use triple backticks with language specification (```python, ```javascript, etc.)
18
+ - Use ## headings for major sections
19
+ - Use bullet points or numbered lists where appropriate
20
+ - Format tables using markdown table syntax when presenting structured data
21
+ - Use **bold** and *italic* for emphasis
22
+ - When referencing file paths, use `inline code` formatting
23
+
24
+ IMPORTANT FORMATTING RULES:
25
+ 1. DO NOT include ```markdown fences at the beginning or end of your answer
26
+ 2. Start your response directly with the content
27
+ 3. The content will already be rendered as markdown, so just provide the raw markdown content
28
+
29
+ Think step by step and ensure your answer is well-structured and visually organized.
30
+ """
31
+
32
+ RAG_TEMPLATE = r"""<START_OF_SYS_PROMPT>
33
+ {system_prompt}
34
+ {output_format_str}
35
+ <END_OF_SYS_PROMPT>
36
+ {# OrderedDict of DialogTurn #}
37
+ {% if conversation_history %}
38
+ <START_OF_CONVERSATION_HISTORY>
39
+ {% for key, dialog_turn in conversation_history.items() %}
40
+ {{key}}.
41
+ User: {{dialog_turn.user_query.query_str}}
42
+ You: {{dialog_turn.assistant_response.response_str}}
43
+ {% endfor %}
44
+ <END_OF_CONVERSATION_HISTORY>
45
+ {% endif %}
46
+ {% if contexts %}
47
+ <START_OF_CONTEXT>
48
+ {% for context in contexts %}
49
+ {{loop.index}}.
50
+ File Path: {{context.meta_data.get('file_path', 'unknown')}}
51
+ Content: {{context.text}}
52
+ {% endfor %}
53
+ <END_OF_CONTEXT>
54
+ {% endif %}
55
+ <START_OF_USER_PROMPT>
56
+ {{input_str}}
57
+ <END_OF_USER_PROMPT>
58
+ """
59
+
60
+ DEEP_RESEARCH_FIRST_ITERATION_PROMPT = """<role>
61
+ You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
62
+ You are conducting a multi-turn Deep Research process to thoroughly investigate the specific topic in the user's query.
63
+ Your goal is to provide detailed, focused information EXCLUSIVELY about this topic.
64
+ IMPORTANT:You MUST respond in {language_name} language.
65
+ </role>
66
+
67
+ <guidelines>
68
+ - This is the first iteration of a multi-turn research process focused EXCLUSIVELY on the user's query
69
+ - Start your response with "## Research Plan"
70
+ - Outline your approach to investigating this specific topic
71
+ - If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
72
+ - Clearly state the specific topic you're researching to maintain focus throughout all iterations
73
+ - Identify the key aspects you'll need to research
74
+ - Provide initial findings based on the information available
75
+ - End with "## Next Steps" indicating what you'll investigate in the next iteration
76
+ - Do NOT provide a final conclusion yet - this is just the beginning of the research
77
+ - Do NOT include general repository information unless directly relevant to the query
78
+ - Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
79
+ - Your research MUST directly address the original question
80
+ - NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
81
+ - Remember that this topic will be maintained across all research iterations
82
+ </guidelines>
83
+
84
+ <style>
85
+ - Be concise but thorough
86
+ - Use markdown formatting to improve readability
87
+ - Cite specific files and code sections when relevant
88
+ </style>"""
89
+
90
+ DEEP_RESEARCH_FINAL_ITERATION_PROMPT = """<role>
91
+ You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
92
+ You are in the final iteration of a Deep Research process focused EXCLUSIVELY on the latest user query.
93
+ Your goal is to synthesize all previous findings and provide a comprehensive conclusion that directly addresses this specific topic and ONLY this topic.
94
+ IMPORTANT:You MUST respond in {language_name} language.
95
+ </role>
96
+
97
+ <guidelines>
98
+ - This is the final iteration of the research process
99
+ - CAREFULLY review the entire conversation history to understand all previous findings
100
+ - Synthesize ALL findings from previous iterations into a comprehensive conclusion
101
+ - Start with "## Final Conclusion"
102
+ - Your conclusion MUST directly address the original question
103
+ - Stay STRICTLY focused on the specific topic - do not drift to related topics
104
+ - Include specific code references and implementation details related to the topic
105
+ - Highlight the most important discoveries and insights about this specific functionality
106
+ - Provide a complete and definitive answer to the original question
107
+ - Do NOT include general repository information unless directly relevant to the query
108
+ - Focus exclusively on the specific topic being researched
109
+ - NEVER respond with "Continue the research" as an answer - always provide a complete conclusion
110
+ - If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
111
+ - Ensure your conclusion builds on and references key findings from previous iterations
112
+ </guidelines>
113
+
114
+ <style>
115
+ - Be concise but thorough
116
+ - Use markdown formatting to improve readability
117
+ - Cite specific files and code sections when relevant
118
+ - Structure your response with clear headings
119
+ - End with actionable insights or recommendations when appropriate
120
+ </style>"""
121
+
122
+ DEEP_RESEARCH_INTERMEDIATE_ITERATION_PROMPT = """<role>
123
+ You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
124
+ You are currently in iteration {research_iteration} of a Deep Research process focused EXCLUSIVELY on the latest user query.
125
+ Your goal is to build upon previous research iterations and go deeper into this specific topic without deviating from it.
126
+ IMPORTANT:You MUST respond in {language_name} language.
127
+ </role>
128
+
129
+ <guidelines>
130
+ - CAREFULLY review the conversation history to understand what has been researched so far
131
+ - Your response MUST build on previous research iterations - do not repeat information already covered
132
+ - Identify gaps or areas that need further exploration related to this specific topic
133
+ - Focus on one specific aspect that needs deeper investigation in this iteration
134
+ - Start your response with "## Research Update {{research_iteration}}"
135
+ - Clearly explain what you're investigating in this iteration
136
+ - Provide new insights that weren't covered in previous iterations
137
+ - If this is iteration 3, prepare for a final conclusion in the next iteration
138
+ - Do NOT include general repository information unless directly relevant to the query
139
+ - Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
140
+ - If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
141
+ - NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
142
+ - Your research MUST directly address the original question
143
+ - Maintain continuity with previous research iterations - this is a continuous investigation
144
+ </guidelines>
145
+
146
+ <style>
147
+ - Be concise but thorough
148
+ - Focus on providing new information, not repeating what's already been covered
149
+ - Use markdown formatting to improve readability
150
+ - Cite specific files and code sections when relevant
151
+ </style>"""
152
+
153
+ SIMPLE_CHAT_SYSTEM_PROMPT = """<role>
154
+ You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
155
+ You provide direct, concise, and accurate information about code repositories.
156
+ You NEVER start responses with markdown headers or code fences.
157
+ IMPORTANT:You MUST respond in {language_name} language.
158
+ </role>
159
+
160
+ <guidelines>
161
+ - Answer the user's question directly without ANY preamble or filler phrases
162
+ - DO NOT include any rationale, explanation, or extra comments.
163
+ - DO NOT start with preambles like "Okay, here's a breakdown" or "Here's an explanation"
164
+ - DO NOT start with markdown headers like "## Analysis of..." or any file path references
165
+ - DO NOT start with ```markdown code fences
166
+ - DO NOT end your response with ``` closing fences
167
+ - DO NOT start by repeating or acknowledging the question
168
+ - JUST START with the direct answer to the question
169
+
170
+ <example_of_what_not_to_do>
171
+ ```markdown
172
+ ## Analysis of `adalflow/adalflow/datasets/gsm8k.py`
173
+
174
+ This file contains...
175
+ ```
176
+ </example_of_what_not_to_do>
177
+
178
+ - Format your response with proper markdown including headings, lists, and code blocks WITHIN your answer
179
+ - For code analysis, organize your response with clear sections
180
+ - Think step by step and structure your answer logically
181
+ - Start with the most relevant information that directly addresses the user's query
182
+ - Be precise and technical when discussing code
183
+ - Your response language should be in the same language as the user's query
184
+ </guidelines>
185
+
186
+ <style>
187
+ - Use concise, direct language
188
+ - Prioritize accuracy over verbosity
189
+ - When showing code, include line numbers and file paths when relevant
190
+ - Use markdown formatting to improve readability
191
+ </style>"""
192
+
193
+
194
+ class PromptTemplate(Protocol):
195
+ def format(self, **kwargs: str) -> str: ...
196
+
197
+
198
+ @dataclass
199
+ class CodeContext:
200
+ source_code: str
201
+ file_path: str | None = None
202
+ qualified_name: str | None = None
203
+ entity_type: str | None = None
204
+ docstring: str | None = None
205
+ callers: list[str] | None = None
206
+ callees: list[str] | None = None
207
+ related_classes: list[str] | None = None
208
+
209
+ def format_context(self) -> str:
210
+ lines = []
211
+
212
+ if self.qualified_name:
213
+ lines.append(f"Entity: {self.qualified_name}")
214
+ if self.entity_type:
215
+ lines.append(f"Type: {self.entity_type}")
216
+ if self.file_path:
217
+ lines.append(f"File: {self.file_path}")
218
+
219
+ if lines:
220
+ lines.append("")
221
+
222
+ if self.docstring:
223
+ lines.append(f"Documentation:\n{self.docstring}")
224
+ lines.append("")
225
+
226
+ lines.append("Source Code:")
227
+ lines.append("```")
228
+ lines.append(self.source_code)
229
+ lines.append("```")
230
+
231
+ if self.callers:
232
+ lines.append("")
233
+ lines.append("Called By:")
234
+ for caller in self.callers[:5]:
235
+ lines.append(f" - {caller}")
236
+
237
+ if self.callees:
238
+ lines.append("")
239
+ lines.append("Calls:")
240
+ for callee in self.callees[:5]:
241
+ lines.append(f" - {callee}")
242
+
243
+ if self.related_classes:
244
+ lines.append("")
245
+ lines.append("Related Classes:")
246
+ for cls in self.related_classes[:5]:
247
+ lines.append(f" - {cls}")
248
+
249
+ return "\n".join(lines)
250
+
251
+
252
+ class CodeAnalysisPrompts:
253
+ SYSTEM_PROMPT = RAG_SYSTEM_PROMPT
254
+
255
+ EXPLAIN_CODE_TEMPLATE = """Please explain the following code in detail.
256
+
257
+ {context}
258
+
259
+ Provide:
260
+ 1. A brief summary of what this code does
261
+ 2. Detailed explanation of the logic and flow
262
+ 3. Key components and their purposes
263
+ 4. Any important patterns or design decisions
264
+ 5. Usage examples if applicable
265
+
266
+ Format your response in markdown."""
267
+
268
+ ANSWER_QUESTION_TEMPLATE = """Based on the following code context, please answer the question.
269
+
270
+ Context:
271
+ {context}
272
+
273
+ Question: {question}
274
+
275
+ Provide a clear, accurate answer based on the code provided. If the answer cannot be determined from the context, say so."""
276
+
277
+ GENERATE_DOC_TEMPLATE = """Generate comprehensive documentation for the following code.
278
+
279
+ {context}
280
+
281
+ Include:
282
+ 1. Overview and purpose
283
+ 2. Parameters and return values (for functions)
284
+ 3. Usage examples
285
+ 4. Important notes or caveats
286
+ 5. Related components
287
+
288
+ Format as markdown suitable for technical documentation."""
289
+
290
+ ANALYZE_ARCHITECTURE_TEMPLATE = """Analyze the architecture and design patterns in the following code.
291
+
292
+ {context}
293
+
294
+ Provide:
295
+ 1. Architectural overview
296
+ 2. Design patterns used
297
+ 3. Component relationships
298
+ 4. Data flow analysis
299
+ 5. Strengths and potential improvements
300
+
301
+ Format your response in markdown with clear sections."""
302
+
303
+ SUMMARIZE_MODULE_TEMPLATE = """Provide a high-level summary of the following module or component.
304
+
305
+ {context}
306
+
307
+ Include:
308
+ 1. Module purpose and responsibilities
309
+ 2. Key classes and functions
310
+ 3. Public API overview
311
+ 4. Dependencies and integrations
312
+ 5. Usage guidelines
313
+
314
+ Keep the summary concise but informative."""
315
+
316
+ def __init__(self):
317
+ pass
318
+
319
+ def get_system_prompt(self) -> str:
320
+ return self.SYSTEM_PROMPT
321
+
322
+ def format_explain_prompt(self, context: CodeContext | str) -> str:
323
+ if isinstance(context, CodeContext):
324
+ context_str = context.format_context()
325
+ else:
326
+ context_str = context
327
+ return self.EXPLAIN_CODE_TEMPLATE.format(context=context_str)
328
+
329
+ def format_query_prompt(self, query: str, context: CodeContext | str) -> str:
330
+ if isinstance(context, CodeContext):
331
+ context_str = context.format_context()
332
+ else:
333
+ context_str = context
334
+ return self.ANSWER_QUESTION_TEMPLATE.format(
335
+ context=context_str,
336
+ question=query,
337
+ )
338
+
339
+ def format_documentation_prompt(self, context: CodeContext | str) -> str:
340
+ if isinstance(context, CodeContext):
341
+ context_str = context.format_context()
342
+ else:
343
+ context_str = context
344
+ return self.GENERATE_DOC_TEMPLATE.format(context=context_str)
345
+
346
+ def format_architecture_prompt(self, context: CodeContext | str) -> str:
347
+ if isinstance(context, CodeContext):
348
+ context_str = context.format_context()
349
+ else:
350
+ context_str = context
351
+ return self.ANALYZE_ARCHITECTURE_TEMPLATE.format(context=context_str)
352
+
353
+ def format_summary_prompt(self, context: CodeContext | str) -> str:
354
+ if isinstance(context, CodeContext):
355
+ context_str = context.format_context()
356
+ else:
357
+ context_str = context
358
+ return self.SUMMARIZE_MODULE_TEMPLATE.format(context=context_str)
359
+
360
+ def format_multi_context_prompt(self, query: str, contexts: list[CodeContext]) -> str:
361
+ context_parts = []
362
+ for i, ctx in enumerate(contexts, 1):
363
+ context_parts.append(f"### Context {i}\n{ctx.format_context()}")
364
+ full_context = "\n\n".join(context_parts)
365
+ return f"""Based on the following code contexts, please answer the question.
366
+
367
+ {full_context}
368
+
369
+ Question: {query}
370
+
371
+ Synthesize information from all contexts to provide a comprehensive answer."""
372
+
373
+
374
+ class RAGPrompts:
375
+ RETRIEVAL_CONTEXT_HEADER = """The following code snippets are retrieved based on semantic similarity to your query. They are ordered by relevance.
376
+
377
+ ---
378
+
379
+ """
380
+
381
+ NO_RESULTS_PROMPT = """No relevant code was found for your query. Please try:
382
+ - Using different keywords
383
+ - Being more specific about the functionality
384
+ - Checking if the code exists in the analyzed repository"""
385
+
386
+ def __init__(self):
387
+ self.analysis = CodeAnalysisPrompts()
388
+
389
+ def format_rag_query(
390
+ self,
391
+ query: str,
392
+ contexts: list[CodeContext],
393
+ include_sources: bool = True,
394
+ ) -> tuple[str, str]:
395
+ if not contexts:
396
+ return (
397
+ self.analysis.get_system_prompt(),
398
+ self.NO_RESULTS_PROMPT,
399
+ )
400
+
401
+ context_parts = [self.RETRIEVAL_CONTEXT_HEADER]
402
+
403
+ for i, ctx in enumerate(contexts, 1):
404
+ context_parts.append(f"## Result {i}")
405
+ if include_sources and ctx.qualified_name:
406
+ context_parts.append(f"**{ctx.qualified_name}**")
407
+ context_parts.append(ctx.format_context())
408
+ context_parts.append("\n---\n")
409
+
410
+ context_str = "\n".join(context_parts)
411
+
412
+ user_prompt = f"""{context_str}
413
+
414
+ Based on the retrieved code above, please answer:
415
+
416
+ {query}
417
+
418
+ Provide a comprehensive answer that synthesizes information from all relevant code snippets."""
419
+
420
+ return (self.analysis.get_system_prompt(), user_prompt)
421
+
422
+
423
+ def get_default_prompts() -> RAGPrompts:
424
+ return RAGPrompts()
425
+
426
+
427
+ def create_code_context(
428
+ source_code: str,
429
+ file_path: str | None = None,
430
+ qualified_name: str | None = None,
431
+ entity_type: str | None = None,
432
+ **kwargs: str | list[str] | None,
433
+ ) -> CodeContext:
434
+ return CodeContext(
435
+ source_code=source_code,
436
+ file_path=file_path,
437
+ qualified_name=qualified_name,
438
+ entity_type=entity_type,
439
+ **kwargs,
440
+ )