code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,411 @@
1
+ """Tests for Qwen3Embedder - API-based Qwen3 Embedding integration.
2
+
3
+ These tests verify the Qwen3Embedder class correctly:
4
+ 1. Loads API configuration from environment variables
5
+ 2. Makes HTTP requests to DashScope API
6
+ 3. Handles API responses and errors
7
+ 4. Implements batch processing with retry logic
8
+ 5. Handles edge cases gracefully
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import TYPE_CHECKING
14
+ from unittest.mock import MagicMock, patch
15
+
16
+ import pytest
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Sequence
20
+
21
+
22
+ class TestQwen3Embedder:
23
+ """Test suite for Qwen3Embedder class (API mode)."""
24
+
25
+ @pytest.fixture
26
+ def mock_env(self, monkeypatch):
27
+ """Set up mock environment variables."""
28
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test-key")
29
+ monkeypatch.setenv("DASHSCOPE_BASE_URL", "https://test.api.com")
30
+
31
+ @pytest.fixture
32
+ def sample_api_response(self):
33
+ """Create a sample successful API response."""
34
+ return {
35
+ "output": {
36
+ "embeddings": [
37
+ {"embedding": [0.1] * 1536, "text_index": 0}
38
+ ]
39
+ },
40
+ "usage": {"total_tokens": 10}
41
+ }
42
+
43
+ @pytest.fixture
44
+ def sample_batch_response(self):
45
+ """Create a sample batch API response."""
46
+ return {
47
+ "output": {
48
+ "embeddings": [
49
+ {"embedding": [0.1] * 1536, "text_index": 0},
50
+ {"embedding": [0.2] * 1536, "text_index": 1},
51
+ {"embedding": [0.3] * 1536, "text_index": 2},
52
+ ]
53
+ },
54
+ "usage": {"total_tokens": 30}
55
+ }
56
+
57
+ def test_embedder_initialization_with_env_var(self, mock_env):
58
+ """Test that Qwen3Embedder loads API key from environment."""
59
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
60
+
61
+ embedder = Qwen3Embedder()
62
+
63
+ assert embedder.api_key == "sk-test-key"
64
+ assert embedder.base_url == "https://test.api.com"
65
+ assert embedder.model == "text-embedding-v4"
66
+
67
+ def test_embedder_initialization_with_api_key_param(self):
68
+ """Test that Qwen3Embedder accepts API key as parameter."""
69
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
70
+
71
+ embedder = Qwen3Embedder(api_key="sk-param-key")
72
+
73
+ assert embedder.api_key == "sk-param-key"
74
+
75
+ def test_embedder_initialization_missing_api_key(self):
76
+ """Test that Qwen3Embedder raises error without API key."""
77
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
78
+
79
+ with patch.dict("os.environ", {}, clear=True):
80
+ with pytest.raises(ValueError, match="DashScope API key required"):
81
+ Qwen3Embedder()
82
+
83
+ def test_embed_code_makes_api_request(self, mock_env, sample_api_response):
84
+ """Test embed_code makes HTTP POST request to API."""
85
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
86
+
87
+ with patch("requests.post") as mock_post:
88
+ mock_response = MagicMock()
89
+ mock_response.status_code = 200
90
+ mock_response.json.return_value = sample_api_response
91
+ mock_post.return_value = mock_response
92
+
93
+ embedder = Qwen3Embedder()
94
+ result = embedder.embed_code("def test(): pass")
95
+
96
+ mock_post.assert_called_once()
97
+ call_args = mock_post.call_args
98
+ assert "embeddings/text-embedding/text-embedding" in call_args[0][0]
99
+
100
+ assert isinstance(result, list)
101
+ assert len(result) == 1536
102
+
103
+ def test_embed_code_with_instruction(self, mock_env, sample_api_response):
104
+ """Test embed_code adds instruction when requested."""
105
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
106
+
107
+ with patch("requests.post") as mock_post:
108
+ mock_response = MagicMock()
109
+ mock_response.status_code = 200
110
+ mock_response.json.return_value = sample_api_response
111
+ mock_post.return_value = mock_response
112
+
113
+ embedder = Qwen3Embedder()
114
+ embedder.embed_code("test query", use_instruction=True)
115
+
116
+ call_kwargs = mock_post.call_args[1]
117
+ payload = call_kwargs["json"]
118
+ text = payload["input"]["texts"][0]
119
+ assert "Instruct:" in text
120
+ assert "Query:" in text
121
+
122
+ def test_embed_code_api_error(self, mock_env):
123
+ """Test embed_code handles API errors gracefully."""
124
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
125
+
126
+ with patch("requests.post") as mock_post:
127
+ mock_response = MagicMock()
128
+ mock_response.status_code = 401
129
+ mock_response.text = "Unauthorized"
130
+ mock_response.json.side_effect = Exception("No JSON")
131
+ mock_post.return_value = mock_response
132
+
133
+ embedder = Qwen3Embedder()
134
+
135
+ with pytest.raises(RuntimeError, match="API request failed"):
136
+ embedder.embed_code("test")
137
+
138
+ def test_embed_batch_makes_single_request(self, mock_env, sample_batch_response):
139
+ """Test embed_batch makes API request for multiple texts."""
140
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
141
+
142
+ with patch("requests.post") as mock_post:
143
+ mock_response = MagicMock()
144
+ mock_response.status_code = 200
145
+ mock_response.json.return_value = sample_batch_response
146
+ mock_post.return_value = mock_response
147
+
148
+ embedder = Qwen3Embedder(batch_size=5)
149
+ texts = ["code1", "code2", "code3"]
150
+ results = embedder.embed_batch(texts)
151
+
152
+ assert len(results) == 3
153
+ assert all(len(r) == 1536 for r in results)
154
+ mock_post.assert_called_once()
155
+
156
+ def test_embed_batch_respects_batch_size(self, mock_env):
157
+ """Test embed_batch splits large batches correctly."""
158
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
159
+
160
+ call_count = 0
161
+
162
+ def mock_post(*args, **kwargs):
163
+ nonlocal call_count
164
+ call_count += 1
165
+ mock_response = MagicMock()
166
+ mock_response.status_code = 200
167
+ # Return embeddings for each text in the batch
168
+ texts = kwargs["json"]["input"]["texts"]
169
+ mock_response.json.return_value = {
170
+ "output": {
171
+ "embeddings": [
172
+ {"embedding": [0.1] * 1536, "text_index": i}
173
+ for i in range(len(texts))
174
+ ]
175
+ }
176
+ }
177
+ return mock_response
178
+
179
+ with patch("requests.post", side_effect=mock_post):
180
+ embedder = Qwen3Embedder(batch_size=2)
181
+ texts = ["code1", "code2", "code3", "code4", "code5"]
182
+ results = embedder.embed_batch(texts)
183
+
184
+ assert len(results) == 5
185
+ # Should make 3 calls: 2+2+1
186
+ assert call_count == 3
187
+
188
+ def test_embed_batch_empty_list(self, mock_env):
189
+ """Test embed_batch handles empty list."""
190
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
191
+
192
+ embedder = Qwen3Embedder()
193
+ results = embedder.embed_batch([])
194
+
195
+ assert results == []
196
+
197
+ def test_embed_batch_api_failure(self, mock_env):
198
+ """Test embed_batch handles API failure."""
199
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
200
+
201
+ with patch("requests.post") as mock_post:
202
+ mock_response = MagicMock()
203
+ mock_response.status_code = 500
204
+ mock_response.text = "Internal Server Error"
205
+ mock_response.json.side_effect = Exception("No JSON")
206
+ mock_post.return_value = mock_response
207
+
208
+ embedder = Qwen3Embedder()
209
+ texts = ["code1", "code2", "code3"]
210
+
211
+ with pytest.raises(RuntimeError):
212
+ embedder.embed_batch(texts)
213
+
214
+ def test_rate_limit_retry(self, mock_env, sample_api_response):
215
+ """Test embed_code retries on rate limit (429)."""
216
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
217
+
218
+ call_count = 0
219
+
220
+ def mock_post(*args, **kwargs):
221
+ nonlocal call_count
222
+ call_count += 1
223
+ mock_response = MagicMock()
224
+
225
+ if call_count < 3:
226
+ mock_response.status_code = 429
227
+ else:
228
+ mock_response.status_code = 200
229
+ mock_response.json.return_value = sample_api_response
230
+
231
+ return mock_response
232
+
233
+ with patch("requests.post", side_effect=mock_post):
234
+ with patch("time.sleep") as mock_sleep: # Don't actually sleep
235
+ embedder = Qwen3Embedder(max_retries=3)
236
+ result = embedder.embed_code("test")
237
+
238
+ assert len(result) == 1536
239
+ assert call_count == 3
240
+ mock_sleep.assert_called() # Should have waited between retries
241
+
242
+ def test_get_embedding_dimension(self, mock_env):
243
+ """Test get_embedding_dimension returns correct value."""
244
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
245
+
246
+ embedder = Qwen3Embedder()
247
+ dimension = embedder.get_embedding_dimension()
248
+
249
+ assert dimension == 1536 # text-embedding-v4 dimension
250
+
251
+ def test_health_check_success(self, mock_env, sample_api_response):
252
+ """Test health_check returns True when API is accessible."""
253
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
254
+
255
+ with patch("requests.post") as mock_post:
256
+ mock_response = MagicMock()
257
+ mock_response.status_code = 200
258
+ mock_response.json.return_value = sample_api_response
259
+ mock_post.return_value = mock_response
260
+
261
+ embedder = Qwen3Embedder()
262
+ result = embedder.health_check()
263
+
264
+ assert result is True
265
+
266
+ def test_health_check_failure(self, mock_env):
267
+ """Test health_check returns False when API fails."""
268
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
269
+
270
+ with patch("requests.post") as mock_post:
271
+ mock_response = MagicMock()
272
+ mock_response.status_code = 401
273
+ mock_post.return_value = mock_response
274
+
275
+ embedder = Qwen3Embedder()
276
+ result = embedder.health_check()
277
+
278
+ assert result is False
279
+
280
+ def test_request_timeout_retry(self, mock_env, sample_api_response):
281
+ """Test request timeout triggers retry."""
282
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
283
+ from requests.exceptions import Timeout
284
+
285
+ call_count = 0
286
+
287
+ def mock_post(*args, **kwargs):
288
+ nonlocal call_count
289
+ call_count += 1
290
+ if call_count < 2:
291
+ raise Timeout("Connection timeout")
292
+
293
+ mock_response = MagicMock()
294
+ mock_response.status_code = 200
295
+ mock_response.json.return_value = sample_api_response
296
+ return mock_response
297
+
298
+ with patch("requests.post", side_effect=mock_post):
299
+ embedder = Qwen3Embedder(max_retries=3)
300
+ result = embedder.embed_code("test")
301
+
302
+ assert len(result) == 1536
303
+ assert call_count == 2
304
+
305
+
306
+ class TestEmbedderConfiguration:
307
+ """Test suite for Qwen3Embedder configuration."""
308
+
309
+ def test_default_model(self, monkeypatch):
310
+ """Test default model is text-embedding-v4."""
311
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
312
+
313
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
314
+
315
+ embedder = Qwen3Embedder()
316
+ assert embedder.model == "text-embedding-v4"
317
+
318
+ def test_custom_model(self, monkeypatch):
319
+ """Test custom model can be specified."""
320
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
321
+
322
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
323
+
324
+ embedder = Qwen3Embedder(model="custom-model")
325
+ assert embedder.model == "custom-model"
326
+
327
+ def test_batch_size_limit(self, monkeypatch):
328
+ """Test batch size is capped at MAX_BATCH_SIZE."""
329
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
330
+
331
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
332
+
333
+ embedder = Qwen3Embedder(batch_size=100)
334
+ assert embedder.batch_size == 25 # Capped at MAX_BATCH_SIZE
335
+
336
+ def test_api_key_format_warning(self, monkeypatch):
337
+ """Test warning for invalid API key format."""
338
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "invalid-key")
339
+
340
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder
341
+
342
+ # Should not raise, but log warning
343
+ embedder = Qwen3Embedder()
344
+ assert embedder.api_key == "invalid-key"
345
+
346
+
347
+ class TestDummyEmbedder:
348
+ """Test suite for DummyEmbedder."""
349
+
350
+ def test_embed_code_returns_zero_vector(self):
351
+ """Test DummyEmbedder returns zero vector."""
352
+ from code_graph_builder.embeddings.qwen3_embedder import DummyEmbedder
353
+
354
+ embedder = DummyEmbedder(dimension=1536)
355
+ result = embedder.embed_code("test")
356
+
357
+ assert len(result) == 1536
358
+ assert all(x == 0.0 for x in result)
359
+
360
+ def test_embed_batch_returns_zero_vectors(self):
361
+ """Test DummyEmbedder returns zero vectors for batch."""
362
+ from code_graph_builder.embeddings.qwen3_embedder import DummyEmbedder
363
+
364
+ embedder = DummyEmbedder(dimension=768)
365
+ results = embedder.embed_batch(["a", "b", "c"])
366
+
367
+ assert len(results) == 3
368
+ assert all(len(r) == 768 and all(x == 0.0 for x in r) for r in results)
369
+
370
+
371
+ class TestCreateEmbedder:
372
+ """Test suite for create_embedder factory function."""
373
+
374
+ def test_create_embedder_with_dummy(self):
375
+ """Test factory creates DummyEmbedder when requested."""
376
+ from code_graph_builder.embeddings.qwen3_embedder import (
377
+ DummyEmbedder,
378
+ create_embedder,
379
+ )
380
+
381
+ embedder = create_embedder(use_dummy=True)
382
+
383
+ assert isinstance(embedder, DummyEmbedder)
384
+
385
+ def test_create_embedder_with_api_key(self, monkeypatch):
386
+ """Test factory creates Qwen3Embedder with API key."""
387
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-factory")
388
+
389
+ from code_graph_builder.embeddings.qwen3_embedder import (
390
+ Qwen3Embedder,
391
+ create_embedder,
392
+ )
393
+
394
+ embedder = create_embedder()
395
+
396
+ assert isinstance(embedder, Qwen3Embedder)
397
+
398
+ def test_create_embedder_passes_kwargs(self, monkeypatch):
399
+ """Test factory passes kwargs to embedder."""
400
+ monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
401
+
402
+ from code_graph_builder.embeddings.qwen3_embedder import (
403
+ Qwen3Embedder,
404
+ create_embedder,
405
+ )
406
+
407
+ embedder = create_embedder(batch_size=10, max_retries=5)
408
+
409
+ assert isinstance(embedder, Qwen3Embedder)
410
+ assert embedder.batch_size == 10
411
+ assert embedder.max_retries == 5