kinetic-context 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. kce/__init__.py +5 -0
  2. kce/_version.py +1 -0
  3. kce/assembly/__init__.py +2 -0
  4. kce/assembly/context_builder.py +41 -0
  5. kce/benchmark/__init__.py +2 -0
  6. kce/benchmark/dataset.py +334 -0
  7. kce/benchmark/django_dataset.py +42 -0
  8. kce/benchmark/flask_dataset.py +42 -0
  9. kce/benchmark/metrics.py +26 -0
  10. kce/benchmark/runner.py +275 -0
  11. kce/cli.py +568 -0
  12. kce/config.py +120 -0
  13. kce/coordinator/__init__.py +2 -0
  14. kce/coordinator/intent.py +95 -0
  15. kce/coordinator/query_coordinator.py +59 -0
  16. kce/coordinator/query_expansion.py +68 -0
  17. kce/coordinator/query_transform.py +41 -0
  18. kce/coordinator/symbol_lookup.py +32 -0
  19. kce/embeddings/__init__.py +2 -0
  20. kce/embeddings/codestral.py +73 -0
  21. kce/engine.py +419 -0
  22. kce/graph/__init__.py +2 -0
  23. kce/graph/ckg.py +136 -0
  24. kce/incremental/__init__.py +2 -0
  25. kce/incremental/time_travel.py +145 -0
  26. kce/incremental/update.py +47 -0
  27. kce/ingestion/__init__.py +2 -0
  28. kce/ingestion/chunker.py +109 -0
  29. kce/ingestion/discovery.py +51 -0
  30. kce/ingestion/parser.py +121 -0
  31. kce/ingestion/summarizer.py +78 -0
  32. kce/mcp_server.py +321 -0
  33. kce/neuro_symbolic/__init__.py +2 -0
  34. kce/neuro_symbolic/loop.py +24 -0
  35. kce/ranking/__init__.py +2 -0
  36. kce/ranking/reranker.py +58 -0
  37. kce/ranking/unified_reranker.py +104 -0
  38. kce/retrieval/__init__.py +2 -0
  39. kce/retrieval/bm25.py +44 -0
  40. kce/retrieval/dense.py +40 -0
  41. kce/retrieval/graph_retrieval.py +70 -0
  42. kce/retrieval/novel_signals.py +221 -0
  43. kce/retrieval/rrf.py +18 -0
  44. kce/store/__init__.py +2 -0
  45. kce/store/index_store.py +57 -0
  46. kce/store/registry.py +256 -0
  47. kce/store/vector_store.py +141 -0
  48. kinetic_context-0.2.1.dist-info/METADATA +348 -0
  49. kinetic_context-0.2.1.dist-info/RECORD +53 -0
  50. kinetic_context-0.2.1.dist-info/WHEEL +5 -0
  51. kinetic_context-0.2.1.dist-info/entry_points.txt +3 -0
  52. kinetic_context-0.2.1.dist-info/licenses/LICENSE +21 -0
  53. kinetic_context-0.2.1.dist-info/top_level.txt +1 -0
kce/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """kinetic-context — repository-level code context engine."""
2
+ __version__ = "0.2.0"
3
+
4
+ from .engine import KCEEngine # noqa: F401
5
+ from .config import KCEConfig # noqa: F401
kce/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.2.1"
@@ -0,0 +1,2 @@
1
+ """Context assembly: budget-aware hierarchical context building."""
2
+ from __future__ import annotations
@@ -0,0 +1,41 @@
1
+ """Budget-aware hierarchical context assembly."""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+ from ..config import KCEConfig
5
+ from ..ingestion.chunker import CodeChunk, estimate_tokens
6
+
7
+ @dataclass
8
+ class ContextPackage:
9
+ system_prompt: str; repo_map: str; relevant_files: list[str]
10
+ primary_chunks: list[CodeChunk]; supporting_chunks: list[CodeChunk]
11
+ full_text: str; token_count: int; budget_used: dict = field(default_factory=dict)
12
+
13
+ class ContextAssembler:
14
+ def __init__(self, config): self.config = config
15
+ def assemble(self, ranked_chunks, repo_map, file_summaries, system_prompt=""):
16
+ budget = self.config.context_budget_tokens
17
+ budgets = {"system": int(budget*0.05), "repo": int(budget*0.08), "files": int(budget*0.12), "primary": int(budget*0.60), "supporting": int(budget*0.15)}
18
+ deduped = self._dedup(ranked_chunks)
19
+ primary = []; supporting = []; used = 0; used_s = 0
20
+ for c in deduped:
21
+ ct = estimate_tokens(c.content)
22
+ if used + ct <= budgets["primary"]: primary.append(c); used += ct
23
+ elif used_s + ct <= budgets["supporting"]: supporting.append(c); used_s += ct
24
+ rel_files = {c.rel_path for c in primary + supporting}
25
+ rel_text = "\n".join(f"## {fp}\n{file_summaries.get(fp,'(no summary)')}" for fp in sorted(rel_files))
26
+ parts = [f"<system_prompt>\n{system_prompt}\n</system_prompt>\n"]
27
+ for c in primary: parts.append(f'<code_chunk role="primary" file="{c.rel_path}" lines="{c.start_line}-{c.end_line}">\n{c.content}\n</code_chunk>')
28
+ for c in supporting: parts.append(f'<code_chunk role="supporting" file="{c.rel_path}">\n{c.content}\n</code_chunk>')
29
+ parts.append(f"<relevant_files>\n{rel_text}\n</relevant_files>\n<repo_map>\n{repo_map}\n</repo_map>")
30
+ full = "\n\n".join(parts)
31
+ return ContextPackage(system_prompt=system_prompt, repo_map=repo_map, relevant_files=sorted(rel_files),
32
+ primary_chunks=primary, supporting_chunks=supporting, full_text=full, token_count=estimate_tokens(full),
33
+ budget_used={"primary": used, "supporting": used_s})
34
+ def _dedup(self, chunks):
35
+ seen_ids = set(); seen_ranges = set(); out = []
36
+ for c in chunks:
37
+ if c.chunk_id in seen_ids: continue
38
+ key = (c.rel_path, c.start_byte, c.end_byte)
39
+ if key in seen_ranges: continue
40
+ seen_ids.add(c.chunk_id); seen_ranges.add(key); out.append(c)
41
+ return out
@@ -0,0 +1,2 @@
1
+ """Benchmark datasets and metrics."""
2
+ from __future__ import annotations
@@ -0,0 +1,334 @@
1
+ """Flask benchmark — 30 queries targeting real Flask source files.
2
+
3
+ The benchmark is split across the five :class:`QueryType` categories
4
+ to match the distribution KCE was tuned on:
5
+
6
+ * 6 IDENTIFIER_LOOKUP
7
+ * 11 SEMANTIC_QUESTION
8
+ * 5 ARCHITECTURE_QUERY
9
+ * 4 BUG_DIAGNOSIS
10
+ * 4 CODE_COMPLETION
11
+
12
+ Each query carries:
13
+
14
+ * ``query`` — natural-language question
15
+ * ``query_type`` — expected QueryType
16
+ * ``gold_files`` — list of Flask source files (POSIX rel paths)
17
+ that a correct answer must surface
18
+ * ``gold_symbols`` — optional list of symbol names that should
19
+ appear in the retrieved chunks (used for
20
+ finer-grained symbol-level evaluation)
21
+ * ``notes`` — free-text annotation
22
+
23
+ The gold files are paths inside the Flask repository
24
+ (``https://github.com/pallets/flask``); they are stable across the
25
+ 3.x line. Where multiple files are equally correct we list them all
26
+ and recall is computed as "fraction of gold found".
27
+ """
28
+ from __future__ import annotations
29
+
30
+ from dataclasses import dataclass, field
31
+ from typing import Optional
32
+
33
+ from ..coordinator.query_coordinator import QueryType
34
+
35
+
36
+ # --------------------------------------------------------------------------- #
37
+ # Container
38
+ # --------------------------------------------------------------------------- #
39
+ @dataclass
40
+ class BenchmarkQuery:
41
+ query: str
42
+ query_type: QueryType
43
+ gold_files: list[str] = field(default_factory=list)
44
+ gold_symbols: list[str] = field(default_factory=list)
45
+ notes: str = ""
46
+ id: str = ""
47
+
48
+
49
+ # --------------------------------------------------------------------------- #
50
+ # The 30 queries
51
+ # --------------------------------------------------------------------------- #
52
+ FLASK_BENCHMARK: list[BenchmarkQuery] = [
53
+ # ----------------------------------------------------------------- #
54
+ # IDENTIFIER_LOOKUP (6)
55
+ # ----------------------------------------------------------------- #
56
+ BenchmarkQuery(
57
+ id="flask-id-01",
58
+ query="app.config.from_object",
59
+ query_type=QueryType.IDENTIFIER_LOOKUP,
60
+ gold_files=["src/flask/config.py"],
61
+ gold_symbols=["from_object"],
62
+ notes="direct config loader",
63
+ ),
64
+ BenchmarkQuery(
65
+ id="flask-id-02",
66
+ query="Flask.run",
67
+ query_type=QueryType.IDENTIFIER_LOOKUP,
68
+ gold_files=["src/flask/app.py"],
69
+ gold_symbols=["run"],
70
+ notes="dev server entrypoint",
71
+ ),
72
+ BenchmarkQuery(
73
+ id="flask-id-03",
74
+ query="url_for",
75
+ query_type=QueryType.IDENTIFIER_LOOKUP,
76
+ gold_files=["src/flask/helpers.py", "src/flask/app.py"],
77
+ gold_symbols=["url_for"],
78
+ notes="URL builder helper",
79
+ ),
80
+ BenchmarkQuery(
81
+ id="flask-id-04",
82
+ query="request.json",
83
+ query_type=QueryType.IDENTIFIER_LOOKUP,
84
+ gold_files=["src/flask/wrappers.py", "src/flask/json"],
85
+ gold_symbols=["json"],
86
+ notes="JSON body accessor on Request",
87
+ ),
88
+ BenchmarkQuery(
89
+ id="flask-id-05",
90
+ query="g",
91
+ query_type=QueryType.IDENTIFIER_LOOKUP,
92
+ gold_files=["src/flask/globals.py", "src/flask/ctx.py"],
93
+ gold_symbols=["g"],
94
+ notes="request-scoped global namespace",
95
+ ),
96
+ BenchmarkQuery(
97
+ id="flask-id-06",
98
+ query="current_app",
99
+ query_type=QueryType.IDENTIFIER_LOOKUP,
100
+ gold_files=["src/flask/globals.py", "src/flask/ctx.py"],
101
+ gold_symbols=["current_app"],
102
+ notes="proxy to the active Flask application",
103
+ ),
104
+
105
+ # ----------------------------------------------------------------- #
106
+ # SEMANTIC_QUESTION (11)
107
+ # ----------------------------------------------------------------- #
108
+ BenchmarkQuery(
109
+ id="flask-sem-01",
110
+ query="How does Flask route an incoming HTTP request to a view function?",
111
+ query_type=QueryType.SEMANTIC_QUESTION,
112
+ gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
113
+ gold_symbols=["dispatch_request", "full_dispatch_request", "url_map"],
114
+ notes="request dispatch flow",
115
+ ),
116
+ BenchmarkQuery(
117
+ id="flask-sem-02",
118
+ query="How does the Flask test client work?",
119
+ query_type=QueryType.SEMANTIC_QUESTION,
120
+ gold_files=["src/flask/testing.py", "src/flask/test.py"],
121
+ gold_symbols=["FlaskClient", "test_client"],
122
+ notes="test client wrapper around Werkzeug",
123
+ ),
124
+ BenchmarkQuery(
125
+ id="flask-sem-03",
126
+ query="How does Flask handle URL building with url_for?",
127
+ query_type=QueryType.SEMANTIC_QUESTION,
128
+ gold_files=["src/flask/helpers.py", "src/flask/app.py"],
129
+ gold_symbols=["url_for"],
130
+ notes="URL builder flow",
131
+ ),
132
+ BenchmarkQuery(
133
+ id="flask-sem-04",
134
+ query="How does Flask manage the application context and request context?",
135
+ query_type=QueryType.SEMANTIC_QUESTION,
136
+ gold_files=["src/flask/ctx.py", "src/flask/globals.py"],
137
+ gold_symbols=["AppContext", "RequestContext", "push", "pop"],
138
+ notes="context lifecycle",
139
+ ),
140
+ BenchmarkQuery(
141
+ id="flask-sem-05",
142
+ query="How does Flask register blueprints?",
143
+ query_type=QueryType.SEMANTIC_QUESTION,
144
+ gold_files=["src/flask/blueprints.py", "src/flask/sansio/blueprints.py", "src/flask/app.py"],
145
+ gold_symbols=["Blueprint", "register_blueprint"],
146
+ notes="blueprint registration",
147
+ ),
148
+ BenchmarkQuery(
149
+ id="flask-sem-06",
150
+ query="How does Flask parse and validate configuration from environment variables?",
151
+ query_type=QueryType.SEMANTIC_QUESTION,
152
+ gold_files=["src/flask/config.py"],
153
+ gold_symbols=["Config", "from_envvar", "from_prefixed_env"],
154
+ notes="config loading",
155
+ ),
156
+ BenchmarkQuery(
157
+ id="flask-sem-07",
158
+ query="How does Flask serialise JSON responses?",
159
+ query_type=QueryType.SEMANTIC_QUESTION,
160
+ gold_files=["src/flask/json/provider.py", "src/flask/json/__init__.py"],
161
+ gold_symbols=["DefaultJSONProvider", "jsonify"],
162
+ notes="JSON provider abstraction",
163
+ ),
164
+ BenchmarkQuery(
165
+ id="flask-sem-08",
166
+ query="How does Flask handle sessions and signed cookies?",
167
+ query_type=QueryType.SEMANTIC_QUESTION,
168
+ gold_files=["src/flask/sessions.py"],
169
+ gold_symbols=["SecureCookieSessionInterface", "SessionInterface"],
170
+ notes="session interface",
171
+ ),
172
+ BenchmarkQuery(
173
+ id="flask-sem-09",
174
+ query="How does Flask implement template rendering with Jinja2?",
175
+ query_type=QueryType.SEMANTIC_QUESTION,
176
+ gold_files=["src/flask/templating.py", "src/flask/app.py"],
177
+ gold_symbols=["render_template", "render_template_string", "create_jinja_environment"],
178
+ notes="template rendering",
179
+ ),
180
+ BenchmarkQuery(
181
+ id="flask-sem-10",
182
+ query="How does Flask handle errors and register custom error handlers?",
183
+ query_type=QueryType.SEMANTIC_QUESTION,
184
+ gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
185
+ gold_symbols=["register_error_handler", "handle_exception", "handle_http_exception"],
186
+ notes="error handler registration",
187
+ ),
188
+ BenchmarkQuery(
189
+ id="flask-sem-11",
190
+ query="How does Flask implement the before_request and after_request hooks?",
191
+ query_type=QueryType.SEMANTIC_QUESTION,
192
+ gold_files=["src/flask/app.py", "src/flask/sansio/scaffold.py"],
193
+ gold_symbols=["before_request", "after_request", "teardown_request"],
194
+ notes="request lifecycle hooks",
195
+ ),
196
+
197
+ # ----------------------------------------------------------------- #
198
+ # ARCHITECTURE_QUERY (5)
199
+ # ----------------------------------------------------------------- #
200
+ BenchmarkQuery(
201
+ id="flask-arch-01",
202
+ query="Show me the module structure of the Flask application class",
203
+ query_type=QueryType.ARCHITECTURE_QUERY,
204
+ gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
205
+ gold_symbols=["Flask", "App"],
206
+ notes="App class layout",
207
+ ),
208
+ BenchmarkQuery(
209
+ id="flask-arch-02",
210
+ query="What is the architecture of the Flask request and response wrappers?",
211
+ query_type=QueryType.ARCHITECTURE_QUERY,
212
+ gold_files=["src/flask/wrappers.py"],
213
+ gold_symbols=["Request", "Response"],
214
+ notes="wrapper class hierarchy",
215
+ ),
216
+ BenchmarkQuery(
217
+ id="flask-arch-03",
218
+ query="Describe the class hierarchy of the Flask session interfaces",
219
+ query_type=QueryType.ARCHITECTURE_QUERY,
220
+ gold_files=["src/flask/sessions.py"],
221
+ gold_symbols=["SessionInterface", "SecureCookieSessionInterface", "SessionMixin"],
222
+ notes="session interface inheritance",
223
+ ),
224
+ BenchmarkQuery(
225
+ id="flask-arch-04",
226
+ query="What is the module layout of the Flask JSON package?",
227
+ query_type=QueryType.ARCHITECTURE_QUERY,
228
+ gold_files=["src/flask/json/__init__.py", "src/flask/json/provider.py"],
229
+ gold_symbols=["provider", "JSONProvider"],
230
+ notes="json subpackage layout",
231
+ ),
232
+ BenchmarkQuery(
233
+ id="flask-arch-05",
234
+ query="Show me the directory structure of the Flask templating subsystem",
235
+ query_type=QueryType.ARCHITECTURE_QUERY,
236
+ gold_files=["src/flask/templating.py", "src/flask/app.py"],
237
+ gold_symbols=["Environment", "DispatchingJinjaLoader", "render_template"],
238
+ notes="templating subsystem structure",
239
+ ),
240
+
241
+ # ----------------------------------------------------------------- #
242
+ # BUG_DIAGNOSIS (4)
243
+ # ----------------------------------------------------------------- #
244
+ BenchmarkQuery(
245
+ id="flask-bug-01",
246
+ query="AttributeError: 'Flask' object has no attribute 'before_first_request' on Flask 3.x",
247
+ query_type=QueryType.BUG_DIAGNOSIS,
248
+ gold_files=["src/flask/app.py"],
249
+ gold_symbols=["before_first_request"],
250
+ notes="removed in Flask 3.x; users must use before_request",
251
+ ),
252
+ BenchmarkQuery(
253
+ id="flask-bug-02",
254
+ query="RuntimeError: Working outside of application context when accessing current_app",
255
+ query_type=QueryType.BUG_DIAGNOSIS,
256
+ gold_files=["src/flask/ctx.py", "src/flask/globals.py"],
257
+ gold_symbols=["AppContext", "current_app", "push"],
258
+ notes="context not pushed",
259
+ ),
260
+ BenchmarkQuery(
261
+ id="flask-bug-03",
262
+ query="ImportError: cannot import name 'flask' from 'flask' — circular import",
263
+ query_type=QueryType.BUG_DIAGNOSIS,
264
+ gold_files=["src/flask/__init__.py"],
265
+ gold_symbols=[],
266
+ notes="circular import in __init__",
267
+ ),
268
+ BenchmarkQuery(
269
+ id="flask-bug-04",
270
+ query="TypeError: __init__() got an unexpected keyword argument 'subdomain_matching' when creating Flask app",
271
+ query_type=QueryType.BUG_DIAGNOSIS,
272
+ gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
273
+ gold_symbols=["Flask", "__init__"],
274
+ notes="argument removed/renamed across versions",
275
+ ),
276
+
277
+ # ----------------------------------------------------------------- #
278
+ # CODE_COMPLETION (4)
279
+ # ----------------------------------------------------------------- #
280
+ BenchmarkQuery(
281
+ id="flask-cc-01",
282
+ query="Complete a Flask route that returns JSON for a GET /api/users endpoint",
283
+ query_type=QueryType.CODE_COMPLETION,
284
+ gold_files=["src/flask/app.py", "src/flask/json/__init__.py"],
285
+ gold_symbols=["route", "jsonify"],
286
+ notes="route + jsonify",
287
+ ),
288
+ BenchmarkQuery(
289
+ id="flask-cc-02",
290
+ query="Implement a Flask before_request hook that loads the current user from a session cookie",
291
+ query_type=QueryType.CODE_COMPLETION,
292
+ gold_files=["src/flask/app.py", "src/flask/ctx.py", "src/flask/sessions.py"],
293
+ gold_symbols=["before_request", "session"],
294
+ notes="auth hook",
295
+ ),
296
+ BenchmarkQuery(
297
+ id="flask-cc-03",
298
+ query="Write a Flask blueprint that registers a JSON error handler for 404",
299
+ query_type=QueryType.CODE_COMPLETION,
300
+ gold_files=["src/flask/blueprints.py", "src/flask/sansio/blueprints.py"],
301
+ gold_symbols=["Blueprint", "register_error_handler", "jsonify"],
302
+ notes="blueprint error handler",
303
+ ),
304
+ BenchmarkQuery(
305
+ id="flask-cc-04",
306
+ query="Complete a Flask CLI command that initialises the database",
307
+ query_type=QueryType.CODE_COMPLETION,
308
+ gold_files=["src/flask/cli.py", "src/flask/app.py"],
309
+ gold_symbols=["AppGroup", "command", "with_appcontext"],
310
+ notes="CLI command with appcontext",
311
+ ),
312
+ ]
313
+
314
+
315
+ # --------------------------------------------------------------------------- #
316
+ # Convenience accessors
317
+ # --------------------------------------------------------------------------- #
318
+ def get_benchmark() -> list[BenchmarkQuery]:
319
+ """Return the full Flask benchmark (30 queries)."""
320
+ return list(FLASK_BENCHMARK)
321
+
322
+
323
+ def get_queries_by_type(query_type: QueryType) -> list[BenchmarkQuery]:
324
+ """Filter the benchmark by :class:`QueryType`."""
325
+ return [q for q in FLASK_BENCHMARK if q.query_type == query_type]
326
+
327
+
328
+ def benchmark_summary() -> dict[str, int]:
329
+ """Return a count of queries per type."""
330
+ out: dict[str, int] = {}
331
+ for q in FLASK_BENCHMARK:
332
+ out[q.query_type.value] = out.get(q.query_type.value, 0) + 1
333
+ out["total"] = len(FLASK_BENCHMARK)
334
+ return out
@@ -0,0 +1,42 @@
1
+ """Django benchmark dataset — 30 queries with verified gold files."""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+
5
+ @dataclass
6
+ class BenchmarkQuery:
7
+ qid: str; query: str; query_type: str; relevant_files: list[str]; relevant_keywords: list[str] = field(default_factory=list)
8
+
9
+ DATASET = [
10
+ BenchmarkQuery("d01","QuerySet","identifier_lookup",["django/db/models/query.py"],["QuerySet"]),
11
+ BenchmarkQuery("d02","ModelBase","identifier_lookup",["django/db/models/base.py"],["ModelBase","Model"]),
12
+ BenchmarkQuery("d03","HttpRequest","identifier_lookup",["django/http/request.py"],["HttpRequest"]),
13
+ BenchmarkQuery("d04","WSGIHandler","identifier_lookup",["django/core/handlers/wsgi.py"],["WSGIHandler"]),
14
+ BenchmarkQuery("d05","BaseForm","identifier_lookup",["django/forms/forms.py"],["BaseForm","Form"]),
15
+ BenchmarkQuery("d06","URLResolver","identifier_lookup",["django/urls/resolvers.py"],["URLResolver","URLPattern"]),
16
+ BenchmarkQuery("d07","how does the ORM QuerySet work","semantic_question",["django/db/models/query.py"],["QuerySet","filter","iterator"]),
17
+ BenchmarkQuery("d08","how does the URL resolver match patterns","semantic_question",["django/urls/resolvers.py"],["URLResolver","resolve","match"]),
18
+ BenchmarkQuery("d09","how does Django process WSGI requests","semantic_question",["django/core/handlers/wsgi.py"],["WSGIHandler","WSGIRequest"]),
19
+ BenchmarkQuery("d10","how does the template engine parse and render","semantic_question",["django/template/base.py"],["Template","Token","Lexer"]),
20
+ BenchmarkQuery("d11","how does Django handle CSRF protection","semantic_question",["django/middleware/csrf.py"],["CSRF","token","middleware"]),
21
+ BenchmarkQuery("d12","how does the password hashing work","semantic_question",["django/contrib/auth/hashers.py"],["check_password","make_password","hasher"]),
22
+ BenchmarkQuery("d13","how does Django send email","semantic_question",["django/core/mail/__init__.py"],["send_mail","EmailMessage"]),
23
+ BenchmarkQuery("d14","how do database transactions work","semantic_question",["django/db/transaction.py"],["atomic","commit","rollback"]),
24
+ BenchmarkQuery("d15","how does form validation work","semantic_question",["django/forms/forms.py"],["BaseForm","is_valid","clean"]),
25
+ BenchmarkQuery("d16","how does the session backend store data","semantic_question",["django/contrib/sessions/backends/db.py"],["SessionStore","SessionBase"]),
26
+ BenchmarkQuery("d17","how does Django load templates","semantic_question",["django/template/loader.py"],["get_template","render_to_string"]),
27
+ BenchmarkQuery("d18","what is the structure of the auth models","architecture_query",["django/contrib/auth/models.py"],["User","Group","Permission"]),
28
+ BenchmarkQuery("d19","what are the HTTP response classes","architecture_query",["django/http/response.py"],["HttpResponse","JsonResponse"]),
29
+ BenchmarkQuery("d20","what validators does Django provide","architecture_query",["django/core/validators.py"],["RegexValidator","URLValidator"]),
30
+ BenchmarkQuery("d21","what field types does the ORM support","architecture_query",["django/db/models/fields/__init__.py"],["CharField","IntegerField"]),
31
+ BenchmarkQuery("d22","what is the management command structure","architecture_query",["django/core/management/base.py"],["BaseCommand","CommandError"]),
32
+ BenchmarkQuery("d23","TransactionManagementError atomic block","bug_diagnosis",["django/db/transaction.py"],["TransactionManagementError"]),
33
+ BenchmarkQuery("d24","VariableDoesNotExist template rendering error","bug_diagnosis",["django/template/base.py"],["VariableDoesNotExist"]),
34
+ BenchmarkQuery("d25","BadHeaderError HttpResponse header injection","bug_diagnosis",["django/http/response.py"],["BadHeaderError"]),
35
+ BenchmarkQuery("d26","CommandError management command failure","bug_diagnosis",["django/core/management/base.py"],["CommandError","CommandParser"]),
36
+ BenchmarkQuery("d27","models.ForeignKey(","code_completion",["django/db/models/fields/related.py"],["ForeignKey"]),
37
+ BenchmarkQuery("d28","forms.ModelForm(","code_completion",["django/forms/models.py"],["ModelForm"]),
38
+ BenchmarkQuery("d29","transaction.atomic(","code_completion",["django/db/transaction.py"],["atomic"]),
39
+ BenchmarkQuery("d30","send_mail(","code_completion",["django/core/mail/__init__.py"],["send_mail"]),
40
+ ]
41
+
42
+ def load_dataset(): return list(DATASET)
@@ -0,0 +1,42 @@
1
+ """Flask benchmark dataset — 30 queries."""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+
5
+ @dataclass
6
+ class BenchmarkQuery:
7
+ qid: str; query: str; query_type: str; relevant_files: list[str]; relevant_keywords: list[str] = field(default_factory=list)
8
+
9
+ DATASET = [
10
+ BenchmarkQuery("f01","Flask","identifier_lookup",["src/flask/app.py","src/flask/__init__.py"],["Flask"]),
11
+ BenchmarkQuery("f02","Blueprint","identifier_lookup",["src/flask/blueprints.py","src/flask/sansio/blueprints.py"],["Blueprint"]),
12
+ BenchmarkQuery("f03","request","identifier_lookup",["src/flask/globals.py","src/flask/wrappers.py"],["request"]),
13
+ BenchmarkQuery("f04","session","identifier_lookup",["src/flask/sessions.py","src/flask/globals.py"],["session"]),
14
+ BenchmarkQuery("f05","current_app","identifier_lookup",["src/flask/globals.py","src/flask/ctx.py"],["current_app"]),
15
+ BenchmarkQuery("f06","g","identifier_lookup",["src/flask/globals.py","src/flask/ctx.py"],["g"]),
16
+ BenchmarkQuery("f07","how does routing work in flask","semantic_question",["src/flask/sansio/scaffold.py","src/flask/app.py"],["route","url_map"]),
17
+ BenchmarkQuery("f08","how does the request context get pushed and popped","semantic_question",["src/flask/ctx.py"],["RequestContext","push","pop"]),
18
+ BenchmarkQuery("f09","how are templates rendered","semantic_question",["src/flask/templating.py","src/flask/helpers.py"],["render_template"]),
19
+ BenchmarkQuery("f10","how does the session cookie get signed and validated","semantic_question",["src/flask/sessions.py"],["SecureCookieSessionInterface","secret_key"]),
20
+ BenchmarkQuery("f11","how are url converters and rules defined","semantic_question",["src/flask/sansio/scaffold.py"],["url_rule","converter","url_map"]),
21
+ BenchmarkQuery("f12","how does flask handle json serialization and deserialization","semantic_question",["src/flask/json/__init__.py","src/flask/json/provider.py"],["JSONProvider","jsonify"]),
22
+ BenchmarkQuery("f13","how does the cli command registration work","semantic_question",["src/flask/cli.py"],["FlaskGroup","AppGroup","command"]),
23
+ BenchmarkQuery("f14","how are errors and http exceptions handled","semantic_question",["src/flask/app.py","src/flask/helpers.py"],["errorhandler","HTTPException","abort"]),
24
+ BenchmarkQuery("f15","how does flask read configuration from environment and files","semantic_question",["src/flask/config.py"],["Config","from_object","from_envvar"]),
25
+ BenchmarkQuery("f16","how do before and after request hooks work","semantic_question",["src/flask/sansio/scaffold.py","src/flask/app.py"],["before_request","after_request"]),
26
+ BenchmarkQuery("f17","how does the test client make requests","semantic_question",["src/flask/testing.py"],["FlaskClient","test_client"]),
27
+ BenchmarkQuery("f18","what is the overall architecture of the flask package","architecture_query",["src/flask/__init__.py","src/flask/app.py"],[]),
28
+ BenchmarkQuery("f19","what are the main building blocks of flask sansio modules","architecture_query",["src/flask/sansio/scaffold.py","src/flask/sansio/app.py","src/flask/sansio/blueprints.py"],[]),
29
+ BenchmarkQuery("f20","Request Response Response body wrappers","architecture_query",["src/flask/wrappers.py"],["Request","Response"]),
30
+ BenchmarkQuery("f21","what classes are in the json module","architecture_query",["src/flask/json/__init__.py","src/flask/json/provider.py"],["JSONProvider"]),
31
+ BenchmarkQuery("f22","AppContext RequestContext context hierarchy","architecture_query",["src/flask/ctx.py"],["AppContext","RequestContext"]),
32
+ BenchmarkQuery("f23","BadRequestKeyError 400 bad request key not found","bug_diagnosis",["src/flask/debughelpers.py","src/flask/sansio/scaffold.py"],["BadRequestKeyError"]),
33
+ BenchmarkQuery("f24","DebugFilesPayload files missing error","bug_diagnosis",["src/flask/debughelpers.py"],["DebugFilesPayload"]),
34
+ BenchmarkQuery("f25","AssertionError view function endpoint mapping required","bug_diagnosis",["src/flask/sansio/scaffold.py","src/flask/debughelpers.py"],["AssertionError","endpoint"]),
35
+ BenchmarkQuery("f26","RuntimeError working outside of application context","bug_diagnosis",["src/flask/ctx.py","src/flask/globals.py"],["RuntimeError","application context"]),
36
+ BenchmarkQuery("f27","app.route(","code_completion",["src/flask/sansio/scaffold.py","src/flask/app.py"],["route"]),
37
+ BenchmarkQuery("f28","render_template(","code_completion",["src/flask/templating.py"],["render_template"]),
38
+ BenchmarkQuery("f29","app.config.from_object(","code_completion",["src/flask/config.py"],["Config","from_object"]),
39
+ BenchmarkQuery("f30","session.permanent(","code_completion",["src/flask/sessions.py"],["SessionMixin","permanent"]),
40
+ ]
41
+
42
+ def load_dataset(): return list(DATASET)
@@ -0,0 +1,26 @@
1
+ """IR metrics: Recall@k, MRR, nDCG, Context F1."""
2
+ import math
3
+
4
+ def recall_at_k(retrieved, relevant, k):
5
+ if not relevant: return 0.0
6
+ return len(set(retrieved[:k]) & set(relevant)) / len(relevant)
7
+
8
+ def mrr(retrieved, relevant):
9
+ for i, f in enumerate(retrieved, 1):
10
+ if f in set(relevant): return 1.0 / i
11
+ return 0.0
12
+
13
+ def ndcg_at_k(retrieved, relevant, k):
14
+ rel = set(relevant); dcg = 0.0
15
+ for i, f in enumerate(retrieved[:k], 1):
16
+ if f in rel: dcg += 1.0 / math.log2(i + 1)
17
+ n_rel = min(len(relevant), k)
18
+ idcg = sum(1.0 / math.log2(i + 1) for i in range(1, n_rel + 1))
19
+ return dcg / idcg if idcg > 0 else 0.0
20
+
21
+ def context_f1(retrieved, gold, k=10):
22
+ g = set(gold); r = set(retrieved[:k])
23
+ if not g: return {"recall": 0.0, "precision": 0.0, "f1": 0.0}
24
+ c = len(g & r); R = c / len(g); P = c / len(r) if r else 0.0
25
+ f1 = 2*R*P/(R+P) if R+P > 0 else 0.0
26
+ return {"recall": R, "precision": P, "f1": f1}