kinetic-context 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kce/__init__.py +5 -0
- kce/_version.py +1 -0
- kce/assembly/__init__.py +2 -0
- kce/assembly/context_builder.py +41 -0
- kce/benchmark/__init__.py +2 -0
- kce/benchmark/dataset.py +334 -0
- kce/benchmark/django_dataset.py +42 -0
- kce/benchmark/flask_dataset.py +42 -0
- kce/benchmark/metrics.py +26 -0
- kce/benchmark/runner.py +275 -0
- kce/cli.py +568 -0
- kce/config.py +120 -0
- kce/coordinator/__init__.py +2 -0
- kce/coordinator/intent.py +95 -0
- kce/coordinator/query_coordinator.py +59 -0
- kce/coordinator/query_expansion.py +68 -0
- kce/coordinator/query_transform.py +41 -0
- kce/coordinator/symbol_lookup.py +32 -0
- kce/embeddings/__init__.py +2 -0
- kce/embeddings/codestral.py +73 -0
- kce/engine.py +419 -0
- kce/graph/__init__.py +2 -0
- kce/graph/ckg.py +136 -0
- kce/incremental/__init__.py +2 -0
- kce/incremental/time_travel.py +145 -0
- kce/incremental/update.py +47 -0
- kce/ingestion/__init__.py +2 -0
- kce/ingestion/chunker.py +109 -0
- kce/ingestion/discovery.py +51 -0
- kce/ingestion/parser.py +121 -0
- kce/ingestion/summarizer.py +78 -0
- kce/mcp_server.py +321 -0
- kce/neuro_symbolic/__init__.py +2 -0
- kce/neuro_symbolic/loop.py +24 -0
- kce/ranking/__init__.py +2 -0
- kce/ranking/reranker.py +58 -0
- kce/ranking/unified_reranker.py +104 -0
- kce/retrieval/__init__.py +2 -0
- kce/retrieval/bm25.py +44 -0
- kce/retrieval/dense.py +40 -0
- kce/retrieval/graph_retrieval.py +70 -0
- kce/retrieval/novel_signals.py +221 -0
- kce/retrieval/rrf.py +18 -0
- kce/store/__init__.py +2 -0
- kce/store/index_store.py +57 -0
- kce/store/registry.py +256 -0
- kce/store/vector_store.py +141 -0
- kinetic_context-0.2.1.dist-info/METADATA +348 -0
- kinetic_context-0.2.1.dist-info/RECORD +53 -0
- kinetic_context-0.2.1.dist-info/WHEEL +5 -0
- kinetic_context-0.2.1.dist-info/entry_points.txt +3 -0
- kinetic_context-0.2.1.dist-info/licenses/LICENSE +21 -0
- kinetic_context-0.2.1.dist-info/top_level.txt +1 -0
kce/__init__.py
ADDED
kce/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.1"
|
kce/assembly/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Budget-aware hierarchical context assembly."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from ..config import KCEConfig
|
|
5
|
+
from ..ingestion.chunker import CodeChunk, estimate_tokens
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ContextPackage:
|
|
9
|
+
system_prompt: str; repo_map: str; relevant_files: list[str]
|
|
10
|
+
primary_chunks: list[CodeChunk]; supporting_chunks: list[CodeChunk]
|
|
11
|
+
full_text: str; token_count: int; budget_used: dict = field(default_factory=dict)
|
|
12
|
+
|
|
13
|
+
class ContextAssembler:
|
|
14
|
+
def __init__(self, config): self.config = config
|
|
15
|
+
def assemble(self, ranked_chunks, repo_map, file_summaries, system_prompt=""):
|
|
16
|
+
budget = self.config.context_budget_tokens
|
|
17
|
+
budgets = {"system": int(budget*0.05), "repo": int(budget*0.08), "files": int(budget*0.12), "primary": int(budget*0.60), "supporting": int(budget*0.15)}
|
|
18
|
+
deduped = self._dedup(ranked_chunks)
|
|
19
|
+
primary = []; supporting = []; used = 0; used_s = 0
|
|
20
|
+
for c in deduped:
|
|
21
|
+
ct = estimate_tokens(c.content)
|
|
22
|
+
if used + ct <= budgets["primary"]: primary.append(c); used += ct
|
|
23
|
+
elif used_s + ct <= budgets["supporting"]: supporting.append(c); used_s += ct
|
|
24
|
+
rel_files = {c.rel_path for c in primary + supporting}
|
|
25
|
+
rel_text = "\n".join(f"## {fp}\n{file_summaries.get(fp,'(no summary)')}" for fp in sorted(rel_files))
|
|
26
|
+
parts = [f"<system_prompt>\n{system_prompt}\n</system_prompt>\n"]
|
|
27
|
+
for c in primary: parts.append(f'<code_chunk role="primary" file="{c.rel_path}" lines="{c.start_line}-{c.end_line}">\n{c.content}\n</code_chunk>')
|
|
28
|
+
for c in supporting: parts.append(f'<code_chunk role="supporting" file="{c.rel_path}">\n{c.content}\n</code_chunk>')
|
|
29
|
+
parts.append(f"<relevant_files>\n{rel_text}\n</relevant_files>\n<repo_map>\n{repo_map}\n</repo_map>")
|
|
30
|
+
full = "\n\n".join(parts)
|
|
31
|
+
return ContextPackage(system_prompt=system_prompt, repo_map=repo_map, relevant_files=sorted(rel_files),
|
|
32
|
+
primary_chunks=primary, supporting_chunks=supporting, full_text=full, token_count=estimate_tokens(full),
|
|
33
|
+
budget_used={"primary": used, "supporting": used_s})
|
|
34
|
+
def _dedup(self, chunks):
|
|
35
|
+
seen_ids = set(); seen_ranges = set(); out = []
|
|
36
|
+
for c in chunks:
|
|
37
|
+
if c.chunk_id in seen_ids: continue
|
|
38
|
+
key = (c.rel_path, c.start_byte, c.end_byte)
|
|
39
|
+
if key in seen_ranges: continue
|
|
40
|
+
seen_ids.add(c.chunk_id); seen_ranges.add(key); out.append(c)
|
|
41
|
+
return out
|
kce/benchmark/dataset.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""Flask benchmark — 30 queries targeting real Flask source files.
|
|
2
|
+
|
|
3
|
+
The benchmark is split across the five :class:`QueryType` categories
|
|
4
|
+
to match the distribution KCE was tuned on:
|
|
5
|
+
|
|
6
|
+
* 6 IDENTIFIER_LOOKUP
|
|
7
|
+
* 11 SEMANTIC_QUESTION
|
|
8
|
+
* 5 ARCHITECTURE_QUERY
|
|
9
|
+
* 4 BUG_DIAGNOSIS
|
|
10
|
+
* 4 CODE_COMPLETION
|
|
11
|
+
|
|
12
|
+
Each query carries:
|
|
13
|
+
|
|
14
|
+
* ``query`` — natural-language question
|
|
15
|
+
* ``query_type`` — expected QueryType
|
|
16
|
+
* ``gold_files`` — list of Flask source files (POSIX rel paths)
|
|
17
|
+
that a correct answer must surface
|
|
18
|
+
* ``gold_symbols`` — optional list of symbol names that should
|
|
19
|
+
appear in the retrieved chunks (used for
|
|
20
|
+
finer-grained symbol-level evaluation)
|
|
21
|
+
* ``notes`` — free-text annotation
|
|
22
|
+
|
|
23
|
+
The gold files are paths inside the Flask repository
|
|
24
|
+
(``https://github.com/pallets/flask``); they are stable across the
|
|
25
|
+
3.x line. Where multiple files are equally correct we list them all
|
|
26
|
+
and recall is computed as "fraction of gold found".
|
|
27
|
+
"""
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from typing import Optional
|
|
32
|
+
|
|
33
|
+
from ..coordinator.query_coordinator import QueryType
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# --------------------------------------------------------------------------- #
|
|
37
|
+
# Container
|
|
38
|
+
# --------------------------------------------------------------------------- #
|
|
39
|
+
@dataclass
|
|
40
|
+
class BenchmarkQuery:
|
|
41
|
+
query: str
|
|
42
|
+
query_type: QueryType
|
|
43
|
+
gold_files: list[str] = field(default_factory=list)
|
|
44
|
+
gold_symbols: list[str] = field(default_factory=list)
|
|
45
|
+
notes: str = ""
|
|
46
|
+
id: str = ""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# --------------------------------------------------------------------------- #
|
|
50
|
+
# The 30 queries
|
|
51
|
+
# --------------------------------------------------------------------------- #
|
|
52
|
+
FLASK_BENCHMARK: list[BenchmarkQuery] = [
|
|
53
|
+
# ----------------------------------------------------------------- #
|
|
54
|
+
# IDENTIFIER_LOOKUP (6)
|
|
55
|
+
# ----------------------------------------------------------------- #
|
|
56
|
+
BenchmarkQuery(
|
|
57
|
+
id="flask-id-01",
|
|
58
|
+
query="app.config.from_object",
|
|
59
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
60
|
+
gold_files=["src/flask/config.py"],
|
|
61
|
+
gold_symbols=["from_object"],
|
|
62
|
+
notes="direct config loader",
|
|
63
|
+
),
|
|
64
|
+
BenchmarkQuery(
|
|
65
|
+
id="flask-id-02",
|
|
66
|
+
query="Flask.run",
|
|
67
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
68
|
+
gold_files=["src/flask/app.py"],
|
|
69
|
+
gold_symbols=["run"],
|
|
70
|
+
notes="dev server entrypoint",
|
|
71
|
+
),
|
|
72
|
+
BenchmarkQuery(
|
|
73
|
+
id="flask-id-03",
|
|
74
|
+
query="url_for",
|
|
75
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
76
|
+
gold_files=["src/flask/helpers.py", "src/flask/app.py"],
|
|
77
|
+
gold_symbols=["url_for"],
|
|
78
|
+
notes="URL builder helper",
|
|
79
|
+
),
|
|
80
|
+
BenchmarkQuery(
|
|
81
|
+
id="flask-id-04",
|
|
82
|
+
query="request.json",
|
|
83
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
84
|
+
gold_files=["src/flask/wrappers.py", "src/flask/json"],
|
|
85
|
+
gold_symbols=["json"],
|
|
86
|
+
notes="JSON body accessor on Request",
|
|
87
|
+
),
|
|
88
|
+
BenchmarkQuery(
|
|
89
|
+
id="flask-id-05",
|
|
90
|
+
query="g",
|
|
91
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
92
|
+
gold_files=["src/flask/globals.py", "src/flask/ctx.py"],
|
|
93
|
+
gold_symbols=["g"],
|
|
94
|
+
notes="request-scoped global namespace",
|
|
95
|
+
),
|
|
96
|
+
BenchmarkQuery(
|
|
97
|
+
id="flask-id-06",
|
|
98
|
+
query="current_app",
|
|
99
|
+
query_type=QueryType.IDENTIFIER_LOOKUP,
|
|
100
|
+
gold_files=["src/flask/globals.py", "src/flask/ctx.py"],
|
|
101
|
+
gold_symbols=["current_app"],
|
|
102
|
+
notes="proxy to the active Flask application",
|
|
103
|
+
),
|
|
104
|
+
|
|
105
|
+
# ----------------------------------------------------------------- #
|
|
106
|
+
# SEMANTIC_QUESTION (11)
|
|
107
|
+
# ----------------------------------------------------------------- #
|
|
108
|
+
BenchmarkQuery(
|
|
109
|
+
id="flask-sem-01",
|
|
110
|
+
query="How does Flask route an incoming HTTP request to a view function?",
|
|
111
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
112
|
+
gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
|
|
113
|
+
gold_symbols=["dispatch_request", "full_dispatch_request", "url_map"],
|
|
114
|
+
notes="request dispatch flow",
|
|
115
|
+
),
|
|
116
|
+
BenchmarkQuery(
|
|
117
|
+
id="flask-sem-02",
|
|
118
|
+
query="How does the Flask test client work?",
|
|
119
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
120
|
+
gold_files=["src/flask/testing.py", "src/flask/test.py"],
|
|
121
|
+
gold_symbols=["FlaskClient", "test_client"],
|
|
122
|
+
notes="test client wrapper around Werkzeug",
|
|
123
|
+
),
|
|
124
|
+
BenchmarkQuery(
|
|
125
|
+
id="flask-sem-03",
|
|
126
|
+
query="How does Flask handle URL building with url_for?",
|
|
127
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
128
|
+
gold_files=["src/flask/helpers.py", "src/flask/app.py"],
|
|
129
|
+
gold_symbols=["url_for"],
|
|
130
|
+
notes="URL builder flow",
|
|
131
|
+
),
|
|
132
|
+
BenchmarkQuery(
|
|
133
|
+
id="flask-sem-04",
|
|
134
|
+
query="How does Flask manage the application context and request context?",
|
|
135
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
136
|
+
gold_files=["src/flask/ctx.py", "src/flask/globals.py"],
|
|
137
|
+
gold_symbols=["AppContext", "RequestContext", "push", "pop"],
|
|
138
|
+
notes="context lifecycle",
|
|
139
|
+
),
|
|
140
|
+
BenchmarkQuery(
|
|
141
|
+
id="flask-sem-05",
|
|
142
|
+
query="How does Flask register blueprints?",
|
|
143
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
144
|
+
gold_files=["src/flask/blueprints.py", "src/flask/sansio/blueprints.py", "src/flask/app.py"],
|
|
145
|
+
gold_symbols=["Blueprint", "register_blueprint"],
|
|
146
|
+
notes="blueprint registration",
|
|
147
|
+
),
|
|
148
|
+
BenchmarkQuery(
|
|
149
|
+
id="flask-sem-06",
|
|
150
|
+
query="How does Flask parse and validate configuration from environment variables?",
|
|
151
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
152
|
+
gold_files=["src/flask/config.py"],
|
|
153
|
+
gold_symbols=["Config", "from_envvar", "from_prefixed_env"],
|
|
154
|
+
notes="config loading",
|
|
155
|
+
),
|
|
156
|
+
BenchmarkQuery(
|
|
157
|
+
id="flask-sem-07",
|
|
158
|
+
query="How does Flask serialise JSON responses?",
|
|
159
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
160
|
+
gold_files=["src/flask/json/provider.py", "src/flask/json/__init__.py"],
|
|
161
|
+
gold_symbols=["DefaultJSONProvider", "jsonify"],
|
|
162
|
+
notes="JSON provider abstraction",
|
|
163
|
+
),
|
|
164
|
+
BenchmarkQuery(
|
|
165
|
+
id="flask-sem-08",
|
|
166
|
+
query="How does Flask handle sessions and signed cookies?",
|
|
167
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
168
|
+
gold_files=["src/flask/sessions.py"],
|
|
169
|
+
gold_symbols=["SecureCookieSessionInterface", "SessionInterface"],
|
|
170
|
+
notes="session interface",
|
|
171
|
+
),
|
|
172
|
+
BenchmarkQuery(
|
|
173
|
+
id="flask-sem-09",
|
|
174
|
+
query="How does Flask implement template rendering with Jinja2?",
|
|
175
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
176
|
+
gold_files=["src/flask/templating.py", "src/flask/app.py"],
|
|
177
|
+
gold_symbols=["render_template", "render_template_string", "create_jinja_environment"],
|
|
178
|
+
notes="template rendering",
|
|
179
|
+
),
|
|
180
|
+
BenchmarkQuery(
|
|
181
|
+
id="flask-sem-10",
|
|
182
|
+
query="How does Flask handle errors and register custom error handlers?",
|
|
183
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
184
|
+
gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
|
|
185
|
+
gold_symbols=["register_error_handler", "handle_exception", "handle_http_exception"],
|
|
186
|
+
notes="error handler registration",
|
|
187
|
+
),
|
|
188
|
+
BenchmarkQuery(
|
|
189
|
+
id="flask-sem-11",
|
|
190
|
+
query="How does Flask implement the before_request and after_request hooks?",
|
|
191
|
+
query_type=QueryType.SEMANTIC_QUESTION,
|
|
192
|
+
gold_files=["src/flask/app.py", "src/flask/sansio/scaffold.py"],
|
|
193
|
+
gold_symbols=["before_request", "after_request", "teardown_request"],
|
|
194
|
+
notes="request lifecycle hooks",
|
|
195
|
+
),
|
|
196
|
+
|
|
197
|
+
# ----------------------------------------------------------------- #
|
|
198
|
+
# ARCHITECTURE_QUERY (5)
|
|
199
|
+
# ----------------------------------------------------------------- #
|
|
200
|
+
BenchmarkQuery(
|
|
201
|
+
id="flask-arch-01",
|
|
202
|
+
query="Show me the module structure of the Flask application class",
|
|
203
|
+
query_type=QueryType.ARCHITECTURE_QUERY,
|
|
204
|
+
gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
|
|
205
|
+
gold_symbols=["Flask", "App"],
|
|
206
|
+
notes="App class layout",
|
|
207
|
+
),
|
|
208
|
+
BenchmarkQuery(
|
|
209
|
+
id="flask-arch-02",
|
|
210
|
+
query="What is the architecture of the Flask request and response wrappers?",
|
|
211
|
+
query_type=QueryType.ARCHITECTURE_QUERY,
|
|
212
|
+
gold_files=["src/flask/wrappers.py"],
|
|
213
|
+
gold_symbols=["Request", "Response"],
|
|
214
|
+
notes="wrapper class hierarchy",
|
|
215
|
+
),
|
|
216
|
+
BenchmarkQuery(
|
|
217
|
+
id="flask-arch-03",
|
|
218
|
+
query="Describe the class hierarchy of the Flask session interfaces",
|
|
219
|
+
query_type=QueryType.ARCHITECTURE_QUERY,
|
|
220
|
+
gold_files=["src/flask/sessions.py"],
|
|
221
|
+
gold_symbols=["SessionInterface", "SecureCookieSessionInterface", "SessionMixin"],
|
|
222
|
+
notes="session interface inheritance",
|
|
223
|
+
),
|
|
224
|
+
BenchmarkQuery(
|
|
225
|
+
id="flask-arch-04",
|
|
226
|
+
query="What is the module layout of the Flask JSON package?",
|
|
227
|
+
query_type=QueryType.ARCHITECTURE_QUERY,
|
|
228
|
+
gold_files=["src/flask/json/__init__.py", "src/flask/json/provider.py"],
|
|
229
|
+
gold_symbols=["provider", "JSONProvider"],
|
|
230
|
+
notes="json subpackage layout",
|
|
231
|
+
),
|
|
232
|
+
BenchmarkQuery(
|
|
233
|
+
id="flask-arch-05",
|
|
234
|
+
query="Show me the directory structure of the Flask templating subsystem",
|
|
235
|
+
query_type=QueryType.ARCHITECTURE_QUERY,
|
|
236
|
+
gold_files=["src/flask/templating.py", "src/flask/app.py"],
|
|
237
|
+
gold_symbols=["Environment", "DispatchingJinjaLoader", "render_template"],
|
|
238
|
+
notes="templating subsystem structure",
|
|
239
|
+
),
|
|
240
|
+
|
|
241
|
+
# ----------------------------------------------------------------- #
|
|
242
|
+
# BUG_DIAGNOSIS (4)
|
|
243
|
+
# ----------------------------------------------------------------- #
|
|
244
|
+
BenchmarkQuery(
|
|
245
|
+
id="flask-bug-01",
|
|
246
|
+
query="AttributeError: 'Flask' object has no attribute 'before_first_request' on Flask 3.x",
|
|
247
|
+
query_type=QueryType.BUG_DIAGNOSIS,
|
|
248
|
+
gold_files=["src/flask/app.py"],
|
|
249
|
+
gold_symbols=["before_first_request"],
|
|
250
|
+
notes="removed in Flask 3.x; users must use before_request",
|
|
251
|
+
),
|
|
252
|
+
BenchmarkQuery(
|
|
253
|
+
id="flask-bug-02",
|
|
254
|
+
query="RuntimeError: Working outside of application context when accessing current_app",
|
|
255
|
+
query_type=QueryType.BUG_DIAGNOSIS,
|
|
256
|
+
gold_files=["src/flask/ctx.py", "src/flask/globals.py"],
|
|
257
|
+
gold_symbols=["AppContext", "current_app", "push"],
|
|
258
|
+
notes="context not pushed",
|
|
259
|
+
),
|
|
260
|
+
BenchmarkQuery(
|
|
261
|
+
id="flask-bug-03",
|
|
262
|
+
query="ImportError: cannot import name 'flask' from 'flask' — circular import",
|
|
263
|
+
query_type=QueryType.BUG_DIAGNOSIS,
|
|
264
|
+
gold_files=["src/flask/__init__.py"],
|
|
265
|
+
gold_symbols=[],
|
|
266
|
+
notes="circular import in __init__",
|
|
267
|
+
),
|
|
268
|
+
BenchmarkQuery(
|
|
269
|
+
id="flask-bug-04",
|
|
270
|
+
query="TypeError: __init__() got an unexpected keyword argument 'subdomain_matching' when creating Flask app",
|
|
271
|
+
query_type=QueryType.BUG_DIAGNOSIS,
|
|
272
|
+
gold_files=["src/flask/app.py", "src/flask/sansio/app.py"],
|
|
273
|
+
gold_symbols=["Flask", "__init__"],
|
|
274
|
+
notes="argument removed/renamed across versions",
|
|
275
|
+
),
|
|
276
|
+
|
|
277
|
+
# ----------------------------------------------------------------- #
|
|
278
|
+
# CODE_COMPLETION (4)
|
|
279
|
+
# ----------------------------------------------------------------- #
|
|
280
|
+
BenchmarkQuery(
|
|
281
|
+
id="flask-cc-01",
|
|
282
|
+
query="Complete a Flask route that returns JSON for a GET /api/users endpoint",
|
|
283
|
+
query_type=QueryType.CODE_COMPLETION,
|
|
284
|
+
gold_files=["src/flask/app.py", "src/flask/json/__init__.py"],
|
|
285
|
+
gold_symbols=["route", "jsonify"],
|
|
286
|
+
notes="route + jsonify",
|
|
287
|
+
),
|
|
288
|
+
BenchmarkQuery(
|
|
289
|
+
id="flask-cc-02",
|
|
290
|
+
query="Implement a Flask before_request hook that loads the current user from a session cookie",
|
|
291
|
+
query_type=QueryType.CODE_COMPLETION,
|
|
292
|
+
gold_files=["src/flask/app.py", "src/flask/ctx.py", "src/flask/sessions.py"],
|
|
293
|
+
gold_symbols=["before_request", "session"],
|
|
294
|
+
notes="auth hook",
|
|
295
|
+
),
|
|
296
|
+
BenchmarkQuery(
|
|
297
|
+
id="flask-cc-03",
|
|
298
|
+
query="Write a Flask blueprint that registers a JSON error handler for 404",
|
|
299
|
+
query_type=QueryType.CODE_COMPLETION,
|
|
300
|
+
gold_files=["src/flask/blueprints.py", "src/flask/sansio/blueprints.py"],
|
|
301
|
+
gold_symbols=["Blueprint", "register_error_handler", "jsonify"],
|
|
302
|
+
notes="blueprint error handler",
|
|
303
|
+
),
|
|
304
|
+
BenchmarkQuery(
|
|
305
|
+
id="flask-cc-04",
|
|
306
|
+
query="Complete a Flask CLI command that initialises the database",
|
|
307
|
+
query_type=QueryType.CODE_COMPLETION,
|
|
308
|
+
gold_files=["src/flask/cli.py", "src/flask/app.py"],
|
|
309
|
+
gold_symbols=["AppGroup", "command", "with_appcontext"],
|
|
310
|
+
notes="CLI command with appcontext",
|
|
311
|
+
),
|
|
312
|
+
]
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# --------------------------------------------------------------------------- #
|
|
316
|
+
# Convenience accessors
|
|
317
|
+
# --------------------------------------------------------------------------- #
|
|
318
|
+
def get_benchmark() -> list[BenchmarkQuery]:
|
|
319
|
+
"""Return the full Flask benchmark (30 queries)."""
|
|
320
|
+
return list(FLASK_BENCHMARK)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_queries_by_type(query_type: QueryType) -> list[BenchmarkQuery]:
|
|
324
|
+
"""Filter the benchmark by :class:`QueryType`."""
|
|
325
|
+
return [q for q in FLASK_BENCHMARK if q.query_type == query_type]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def benchmark_summary() -> dict[str, int]:
|
|
329
|
+
"""Return a count of queries per type."""
|
|
330
|
+
out: dict[str, int] = {}
|
|
331
|
+
for q in FLASK_BENCHMARK:
|
|
332
|
+
out[q.query_type.value] = out.get(q.query_type.value, 0) + 1
|
|
333
|
+
out["total"] = len(FLASK_BENCHMARK)
|
|
334
|
+
return out
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Django benchmark dataset — 30 queries with verified gold files."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class BenchmarkQuery:
|
|
7
|
+
qid: str; query: str; query_type: str; relevant_files: list[str]; relevant_keywords: list[str] = field(default_factory=list)
|
|
8
|
+
|
|
9
|
+
DATASET = [
|
|
10
|
+
BenchmarkQuery("d01","QuerySet","identifier_lookup",["django/db/models/query.py"],["QuerySet"]),
|
|
11
|
+
BenchmarkQuery("d02","ModelBase","identifier_lookup",["django/db/models/base.py"],["ModelBase","Model"]),
|
|
12
|
+
BenchmarkQuery("d03","HttpRequest","identifier_lookup",["django/http/request.py"],["HttpRequest"]),
|
|
13
|
+
BenchmarkQuery("d04","WSGIHandler","identifier_lookup",["django/core/handlers/wsgi.py"],["WSGIHandler"]),
|
|
14
|
+
BenchmarkQuery("d05","BaseForm","identifier_lookup",["django/forms/forms.py"],["BaseForm","Form"]),
|
|
15
|
+
BenchmarkQuery("d06","URLResolver","identifier_lookup",["django/urls/resolvers.py"],["URLResolver","URLPattern"]),
|
|
16
|
+
BenchmarkQuery("d07","how does the ORM QuerySet work","semantic_question",["django/db/models/query.py"],["QuerySet","filter","iterator"]),
|
|
17
|
+
BenchmarkQuery("d08","how does the URL resolver match patterns","semantic_question",["django/urls/resolvers.py"],["URLResolver","resolve","match"]),
|
|
18
|
+
BenchmarkQuery("d09","how does Django process WSGI requests","semantic_question",["django/core/handlers/wsgi.py"],["WSGIHandler","WSGIRequest"]),
|
|
19
|
+
BenchmarkQuery("d10","how does the template engine parse and render","semantic_question",["django/template/base.py"],["Template","Token","Lexer"]),
|
|
20
|
+
BenchmarkQuery("d11","how does Django handle CSRF protection","semantic_question",["django/middleware/csrf.py"],["CSRF","token","middleware"]),
|
|
21
|
+
BenchmarkQuery("d12","how does the password hashing work","semantic_question",["django/contrib/auth/hashers.py"],["check_password","make_password","hasher"]),
|
|
22
|
+
BenchmarkQuery("d13","how does Django send email","semantic_question",["django/core/mail/__init__.py"],["send_mail","EmailMessage"]),
|
|
23
|
+
BenchmarkQuery("d14","how do database transactions work","semantic_question",["django/db/transaction.py"],["atomic","commit","rollback"]),
|
|
24
|
+
BenchmarkQuery("d15","how does form validation work","semantic_question",["django/forms/forms.py"],["BaseForm","is_valid","clean"]),
|
|
25
|
+
BenchmarkQuery("d16","how does the session backend store data","semantic_question",["django/contrib/sessions/backends/db.py"],["SessionStore","SessionBase"]),
|
|
26
|
+
BenchmarkQuery("d17","how does Django load templates","semantic_question",["django/template/loader.py"],["get_template","render_to_string"]),
|
|
27
|
+
BenchmarkQuery("d18","what is the structure of the auth models","architecture_query",["django/contrib/auth/models.py"],["User","Group","Permission"]),
|
|
28
|
+
BenchmarkQuery("d19","what are the HTTP response classes","architecture_query",["django/http/response.py"],["HttpResponse","JsonResponse"]),
|
|
29
|
+
BenchmarkQuery("d20","what validators does Django provide","architecture_query",["django/core/validators.py"],["RegexValidator","URLValidator"]),
|
|
30
|
+
BenchmarkQuery("d21","what field types does the ORM support","architecture_query",["django/db/models/fields/__init__.py"],["CharField","IntegerField"]),
|
|
31
|
+
BenchmarkQuery("d22","what is the management command structure","architecture_query",["django/core/management/base.py"],["BaseCommand","CommandError"]),
|
|
32
|
+
BenchmarkQuery("d23","TransactionManagementError atomic block","bug_diagnosis",["django/db/transaction.py"],["TransactionManagementError"]),
|
|
33
|
+
BenchmarkQuery("d24","VariableDoesNotExist template rendering error","bug_diagnosis",["django/template/base.py"],["VariableDoesNotExist"]),
|
|
34
|
+
BenchmarkQuery("d25","BadHeaderError HttpResponse header injection","bug_diagnosis",["django/http/response.py"],["BadHeaderError"]),
|
|
35
|
+
BenchmarkQuery("d26","CommandError management command failure","bug_diagnosis",["django/core/management/base.py"],["CommandError","CommandParser"]),
|
|
36
|
+
BenchmarkQuery("d27","models.ForeignKey(","code_completion",["django/db/models/fields/related.py"],["ForeignKey"]),
|
|
37
|
+
BenchmarkQuery("d28","forms.ModelForm(","code_completion",["django/forms/models.py"],["ModelForm"]),
|
|
38
|
+
BenchmarkQuery("d29","transaction.atomic(","code_completion",["django/db/transaction.py"],["atomic"]),
|
|
39
|
+
BenchmarkQuery("d30","send_mail(","code_completion",["django/core/mail/__init__.py"],["send_mail"]),
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
def load_dataset(): return list(DATASET)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Flask benchmark dataset — 30 queries."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class BenchmarkQuery:
|
|
7
|
+
qid: str; query: str; query_type: str; relevant_files: list[str]; relevant_keywords: list[str] = field(default_factory=list)
|
|
8
|
+
|
|
9
|
+
DATASET = [
|
|
10
|
+
BenchmarkQuery("f01","Flask","identifier_lookup",["src/flask/app.py","src/flask/__init__.py"],["Flask"]),
|
|
11
|
+
BenchmarkQuery("f02","Blueprint","identifier_lookup",["src/flask/blueprints.py","src/flask/sansio/blueprints.py"],["Blueprint"]),
|
|
12
|
+
BenchmarkQuery("f03","request","identifier_lookup",["src/flask/globals.py","src/flask/wrappers.py"],["request"]),
|
|
13
|
+
BenchmarkQuery("f04","session","identifier_lookup",["src/flask/sessions.py","src/flask/globals.py"],["session"]),
|
|
14
|
+
BenchmarkQuery("f05","current_app","identifier_lookup",["src/flask/globals.py","src/flask/ctx.py"],["current_app"]),
|
|
15
|
+
BenchmarkQuery("f06","g","identifier_lookup",["src/flask/globals.py","src/flask/ctx.py"],["g"]),
|
|
16
|
+
BenchmarkQuery("f07","how does routing work in flask","semantic_question",["src/flask/sansio/scaffold.py","src/flask/app.py"],["route","url_map"]),
|
|
17
|
+
BenchmarkQuery("f08","how does the request context get pushed and popped","semantic_question",["src/flask/ctx.py"],["RequestContext","push","pop"]),
|
|
18
|
+
BenchmarkQuery("f09","how are templates rendered","semantic_question",["src/flask/templating.py","src/flask/helpers.py"],["render_template"]),
|
|
19
|
+
BenchmarkQuery("f10","how does the session cookie get signed and validated","semantic_question",["src/flask/sessions.py"],["SecureCookieSessionInterface","secret_key"]),
|
|
20
|
+
BenchmarkQuery("f11","how are url converters and rules defined","semantic_question",["src/flask/sansio/scaffold.py"],["url_rule","converter","url_map"]),
|
|
21
|
+
BenchmarkQuery("f12","how does flask handle json serialization and deserialization","semantic_question",["src/flask/json/__init__.py","src/flask/json/provider.py"],["JSONProvider","jsonify"]),
|
|
22
|
+
BenchmarkQuery("f13","how does the cli command registration work","semantic_question",["src/flask/cli.py"],["FlaskGroup","AppGroup","command"]),
|
|
23
|
+
BenchmarkQuery("f14","how are errors and http exceptions handled","semantic_question",["src/flask/app.py","src/flask/helpers.py"],["errorhandler","HTTPException","abort"]),
|
|
24
|
+
BenchmarkQuery("f15","how does flask read configuration from environment and files","semantic_question",["src/flask/config.py"],["Config","from_object","from_envvar"]),
|
|
25
|
+
BenchmarkQuery("f16","how do before and after request hooks work","semantic_question",["src/flask/sansio/scaffold.py","src/flask/app.py"],["before_request","after_request"]),
|
|
26
|
+
BenchmarkQuery("f17","how does the test client make requests","semantic_question",["src/flask/testing.py"],["FlaskClient","test_client"]),
|
|
27
|
+
BenchmarkQuery("f18","what is the overall architecture of the flask package","architecture_query",["src/flask/__init__.py","src/flask/app.py"],[]),
|
|
28
|
+
BenchmarkQuery("f19","what are the main building blocks of flask sansio modules","architecture_query",["src/flask/sansio/scaffold.py","src/flask/sansio/app.py","src/flask/sansio/blueprints.py"],[]),
|
|
29
|
+
BenchmarkQuery("f20","Request Response Response body wrappers","architecture_query",["src/flask/wrappers.py"],["Request","Response"]),
|
|
30
|
+
BenchmarkQuery("f21","what classes are in the json module","architecture_query",["src/flask/json/__init__.py","src/flask/json/provider.py"],["JSONProvider"]),
|
|
31
|
+
BenchmarkQuery("f22","AppContext RequestContext context hierarchy","architecture_query",["src/flask/ctx.py"],["AppContext","RequestContext"]),
|
|
32
|
+
BenchmarkQuery("f23","BadRequestKeyError 400 bad request key not found","bug_diagnosis",["src/flask/debughelpers.py","src/flask/sansio/scaffold.py"],["BadRequestKeyError"]),
|
|
33
|
+
BenchmarkQuery("f24","DebugFilesPayload files missing error","bug_diagnosis",["src/flask/debughelpers.py"],["DebugFilesPayload"]),
|
|
34
|
+
BenchmarkQuery("f25","AssertionError view function endpoint mapping required","bug_diagnosis",["src/flask/sansio/scaffold.py","src/flask/debughelpers.py"],["AssertionError","endpoint"]),
|
|
35
|
+
BenchmarkQuery("f26","RuntimeError working outside of application context","bug_diagnosis",["src/flask/ctx.py","src/flask/globals.py"],["RuntimeError","application context"]),
|
|
36
|
+
BenchmarkQuery("f27","app.route(","code_completion",["src/flask/sansio/scaffold.py","src/flask/app.py"],["route"]),
|
|
37
|
+
BenchmarkQuery("f28","render_template(","code_completion",["src/flask/templating.py"],["render_template"]),
|
|
38
|
+
BenchmarkQuery("f29","app.config.from_object(","code_completion",["src/flask/config.py"],["Config","from_object"]),
|
|
39
|
+
BenchmarkQuery("f30","session.permanent(","code_completion",["src/flask/sessions.py"],["SessionMixin","permanent"]),
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
def load_dataset(): return list(DATASET)
|
kce/benchmark/metrics.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""IR metrics: Recall@k, MRR, nDCG, Context F1."""
|
|
2
|
+
import math
|
|
3
|
+
|
|
4
|
+
def recall_at_k(retrieved, relevant, k):
|
|
5
|
+
if not relevant: return 0.0
|
|
6
|
+
return len(set(retrieved[:k]) & set(relevant)) / len(relevant)
|
|
7
|
+
|
|
8
|
+
def mrr(retrieved, relevant):
|
|
9
|
+
for i, f in enumerate(retrieved, 1):
|
|
10
|
+
if f in set(relevant): return 1.0 / i
|
|
11
|
+
return 0.0
|
|
12
|
+
|
|
13
|
+
def ndcg_at_k(retrieved, relevant, k):
|
|
14
|
+
rel = set(relevant); dcg = 0.0
|
|
15
|
+
for i, f in enumerate(retrieved[:k], 1):
|
|
16
|
+
if f in rel: dcg += 1.0 / math.log2(i + 1)
|
|
17
|
+
n_rel = min(len(relevant), k)
|
|
18
|
+
idcg = sum(1.0 / math.log2(i + 1) for i in range(1, n_rel + 1))
|
|
19
|
+
return dcg / idcg if idcg > 0 else 0.0
|
|
20
|
+
|
|
21
|
+
def context_f1(retrieved, gold, k=10):
|
|
22
|
+
g = set(gold); r = set(retrieved[:k])
|
|
23
|
+
if not g: return {"recall": 0.0, "precision": 0.0, "f1": 0.0}
|
|
24
|
+
c = len(g & r); R = c / len(g); P = c / len(r) if r else 0.0
|
|
25
|
+
f1 = 2*R*P/(R+P) if R+P > 0 else 0.0
|
|
26
|
+
return {"recall": R, "precision": P, "f1": f1}
|