ragtime-cli 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/METADATA +1 -1
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/RECORD +11 -10
- src/cli.py +14 -9
- src/db.py +194 -4
- src/feedback.py +202 -0
- src/indexers/docs.py +197 -19
- src/mcp_server.py +133 -14
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/WHEEL +0 -0
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/entry_points.txt +0 -0
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/licenses/LICENSE +0 -0
- {ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragtime-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.15
|
|
4
4
|
Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
|
|
5
5
|
Author-email: Bret Martineau <bretwardjames@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
ragtime_cli-0.2.
|
|
1
|
+
ragtime_cli-0.2.15.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
|
|
2
2
|
src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
src/cli.py,sha256=
|
|
3
|
+
src/cli.py,sha256=RmH3M9NvZcIO4sjYgrEJJrD-mn2mcK4dPyqaBxrhdeU,76984
|
|
4
4
|
src/config.py,sha256=tQ6gPLr4ksn2bJPIUjtELFr-k01Eg4g-LDo3GNE6P0Q,4600
|
|
5
|
-
src/db.py,sha256=
|
|
6
|
-
src/
|
|
5
|
+
src/db.py,sha256=eWqFGrg3O6hve67EzRJGcAsIpYxWJo4JlrAtlZUUA_s,15169
|
|
6
|
+
src/feedback.py,sha256=cPw_lzusZZPvkgUxs_eV67NtV1FoCfTXUulBPnD78lo,6455
|
|
7
|
+
src/mcp_server.py,sha256=QHU8jtPdA-kEzoXj88ZM0XhFvwhIngKD8Ow4plvHBfM,26498
|
|
7
8
|
src/memory.py,sha256=UiHyudKbseMMY-sdcaDSfVBMGj6sFXXw1GxBsZ7nuBc,18450
|
|
8
9
|
src/commands/audit.md,sha256=Xkucm-gfBIMalK9wf7NBbyejpsqBTUAGGlb7GxMtMPY,5137
|
|
9
10
|
src/commands/create-pr.md,sha256=u6-jVkDP_6bJQp6ImK039eY9F6B9E2KlAVlvLY-WV6Q,9483
|
|
@@ -17,9 +18,9 @@ src/commands/save.md,sha256=7gTpW46AU9Y4l8XVZ8f4h1sEdBfVqIRA7hlidUxMAC4,251
|
|
|
17
18
|
src/commands/start.md,sha256=qoqhkMgET74DBx8YPIT1-wqCiVBUDxlmevigsCinHSY,6506
|
|
18
19
|
src/indexers/__init__.py,sha256=MYoCPZUpHakMX1s2vWnc9shjWfx_X1_0JzUhpKhnKUQ,454
|
|
19
20
|
src/indexers/code.py,sha256=G2TbiKbWj0e7DV5KsU8-Ggw6ziDb4zTuZ4Bu3ryV4g8,18059
|
|
20
|
-
src/indexers/docs.py,sha256=
|
|
21
|
-
ragtime_cli-0.2.
|
|
22
|
-
ragtime_cli-0.2.
|
|
23
|
-
ragtime_cli-0.2.
|
|
24
|
-
ragtime_cli-0.2.
|
|
25
|
-
ragtime_cli-0.2.
|
|
21
|
+
src/indexers/docs.py,sha256=Q8krHYw0bybUyZaq1sJ0r6Fv-I_6BjTufhqI1eg_25s,9992
|
|
22
|
+
ragtime_cli-0.2.15.dist-info/METADATA,sha256=J0tETjffr7XYMo3VmUwtm6SqUUnsuPVkrNpw5VYcgd8,11269
|
|
23
|
+
ragtime_cli-0.2.15.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
24
|
+
ragtime_cli-0.2.15.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
|
|
25
|
+
ragtime_cli-0.2.15.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
|
|
26
|
+
ragtime_cli-0.2.15.dist-info/RECORD,,
|
src/cli.py
CHANGED
|
@@ -381,13 +381,13 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
381
381
|
item_show_func=lambda f: f.name[:30] if f else "",
|
|
382
382
|
) as files:
|
|
383
383
|
for file_path in files:
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
384
|
+
# index_doc_file returns list (hierarchical chunks)
|
|
385
|
+
file_entries = index_doc_file(file_path)
|
|
386
|
+
entries.extend(file_entries)
|
|
387
387
|
|
|
388
388
|
if entries:
|
|
389
389
|
_upsert_entries(db, entries, "docs")
|
|
390
|
-
click.echo(f" Indexed {len(entries)}
|
|
390
|
+
click.echo(f" Indexed {len(entries)} document chunks")
|
|
391
391
|
elif not to_delete:
|
|
392
392
|
click.echo(" All docs up to date")
|
|
393
393
|
else:
|
|
@@ -470,17 +470,21 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
470
470
|
@click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
|
|
471
471
|
@click.option("--namespace", "-n", help="Filter by namespace")
|
|
472
472
|
@click.option("--require", "-r", "require_terms", multiple=True,
|
|
473
|
-
help="
|
|
473
|
+
help="Additional terms that MUST appear (usually auto-detected)")
|
|
474
|
+
@click.option("--raw", is_flag=True, help="Disable auto-detection of qualifiers")
|
|
474
475
|
@click.option("--include-archive", is_flag=True, help="Also search archived branches")
|
|
475
476
|
@click.option("--limit", "-l", default=5, help="Max results")
|
|
476
477
|
@click.option("--verbose", "-v", is_flag=True, help="Show full content")
|
|
477
478
|
def search(query: str, path: Path, type_filter: str, namespace: str,
|
|
478
|
-
require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
|
|
479
|
+
require_terms: tuple, raw: bool, include_archive: bool, limit: int, verbose: bool):
|
|
479
480
|
"""
|
|
480
|
-
|
|
481
|
+
Smart search: auto-detects qualifiers like 'mobile', 'auth', 'dart'.
|
|
481
482
|
|
|
482
|
-
|
|
483
|
-
|
|
483
|
+
\b
|
|
484
|
+
Examples:
|
|
485
|
+
ragtime search "error handling in mobile" # auto-requires 'mobile'
|
|
486
|
+
ragtime search "auth flow" # auto-requires 'auth'
|
|
487
|
+
ragtime search "useAsyncState" --raw # literal search, no extraction
|
|
484
488
|
"""
|
|
485
489
|
path = Path(path).resolve()
|
|
486
490
|
db = get_db(path)
|
|
@@ -493,6 +497,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
|
|
|
493
497
|
type_filter=type_arg,
|
|
494
498
|
namespace=namespace,
|
|
495
499
|
require_terms=list(require_terms) if require_terms else None,
|
|
500
|
+
auto_extract=not raw,
|
|
496
501
|
)
|
|
497
502
|
|
|
498
503
|
if not results:
|
src/db.py
CHANGED
|
@@ -4,12 +4,74 @@ ChromaDB wrapper for ragtime.
|
|
|
4
4
|
Handles storage and retrieval of indexed documents and code.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import re
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
9
10
|
import chromadb
|
|
10
11
|
from chromadb.config import Settings
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def extract_query_hints(query: str, known_components: list[str] | None = None) -> tuple[str, list[str]]:
|
|
15
|
+
"""
|
|
16
|
+
Extract component/scope hints from a query for hybrid search.
|
|
17
|
+
|
|
18
|
+
Detects patterns like "X in mobile", "mobile X", "X for auth" and extracts
|
|
19
|
+
the qualifier to use as require_terms. This prevents qualifiers from being
|
|
20
|
+
diluted in semantic search.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
query: The natural language search query
|
|
24
|
+
known_components: Optional list of known component names to detect
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
(cleaned_query, extracted_terms) - query with hints removed, terms to require
|
|
28
|
+
"""
|
|
29
|
+
# Default known components/scopes (common patterns)
|
|
30
|
+
default_components = [
|
|
31
|
+
# Platforms
|
|
32
|
+
"mobile", "web", "desktop", "ios", "android", "flutter", "react", "vue",
|
|
33
|
+
# Languages
|
|
34
|
+
"dart", "python", "typescript", "javascript", "ts", "js", "py",
|
|
35
|
+
# Common components
|
|
36
|
+
"auth", "authentication", "api", "database", "db", "ui", "frontend", "backend",
|
|
37
|
+
"server", "client", "admin", "user", "payment", "billing", "notification",
|
|
38
|
+
"email", "cache", "queue", "worker", "scheduler", "logging", "metrics",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
components = set(c.lower() for c in (known_components or default_components))
|
|
42
|
+
extracted = []
|
|
43
|
+
cleaned = query
|
|
44
|
+
|
|
45
|
+
# Pattern 1: "X in/for/on {component}" - extract component
|
|
46
|
+
patterns = [
|
|
47
|
+
r'\b(?:in|for|on|from|using|with)\s+(?:the\s+)?(\w+)\s*(?:app|code|module|service|codebase)?(?:\s|$)',
|
|
48
|
+
r'\b(\w+)\s+(?:app|code|module|service|codebase)\b',
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
for pattern in patterns:
|
|
52
|
+
for match in re.finditer(pattern, query, re.IGNORECASE):
|
|
53
|
+
word = match.group(1).lower()
|
|
54
|
+
if word in components:
|
|
55
|
+
extracted.append(word)
|
|
56
|
+
# Remove the matched phrase from query
|
|
57
|
+
cleaned = cleaned[:match.start()] + " " + cleaned[match.end():]
|
|
58
|
+
|
|
59
|
+
# Pattern 2: Check if any known component appears as standalone word
|
|
60
|
+
words = re.findall(r'\b\w+\b', query.lower())
|
|
61
|
+
for word in words:
|
|
62
|
+
if word in components and word not in extracted:
|
|
63
|
+
# Only extract if it looks like a qualifier (not the main subject)
|
|
64
|
+
# Heuristic: if query has other meaningful words, it's likely a qualifier
|
|
65
|
+
other_words = [w for w in words if w != word and len(w) > 3]
|
|
66
|
+
if len(other_words) >= 2:
|
|
67
|
+
extracted.append(word)
|
|
68
|
+
|
|
69
|
+
# Clean up extra whitespace
|
|
70
|
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
|
71
|
+
|
|
72
|
+
return cleaned, list(set(extracted))
|
|
73
|
+
|
|
74
|
+
|
|
13
75
|
class RagtimeDB:
|
|
14
76
|
"""Vector database for ragtime indexes."""
|
|
15
77
|
|
|
@@ -85,6 +147,7 @@ class RagtimeDB:
|
|
|
85
147
|
type_filter: str | None = None,
|
|
86
148
|
namespace: str | None = None,
|
|
87
149
|
require_terms: list[str] | None = None,
|
|
150
|
+
auto_extract: bool = True,
|
|
88
151
|
**filters,
|
|
89
152
|
) -> list[dict]:
|
|
90
153
|
"""
|
|
@@ -98,11 +161,26 @@ class RagtimeDB:
|
|
|
98
161
|
require_terms: List of terms that MUST appear in results (case-insensitive).
|
|
99
162
|
Use for scoped queries like "error handling in mobile" with
|
|
100
163
|
require_terms=["mobile"] to ensure "mobile" isn't ignored.
|
|
164
|
+
auto_extract: If True (default), automatically detect component qualifiers
|
|
165
|
+
in the query and add them to require_terms. Set to False
|
|
166
|
+
for raw/literal search.
|
|
101
167
|
**filters: Additional metadata filters (None values are ignored)
|
|
102
168
|
|
|
103
169
|
Returns:
|
|
104
170
|
List of dicts with 'content', 'metadata', 'distance'
|
|
105
171
|
"""
|
|
172
|
+
# Auto-extract component hints from query if enabled
|
|
173
|
+
search_query = query
|
|
174
|
+
all_require_terms = list(require_terms) if require_terms else []
|
|
175
|
+
|
|
176
|
+
if auto_extract:
|
|
177
|
+
cleaned_query, extracted = extract_query_hints(query)
|
|
178
|
+
if extracted:
|
|
179
|
+
# Use cleaned query for embedding (removes noise)
|
|
180
|
+
search_query = cleaned_query
|
|
181
|
+
# Add extracted terms to require_terms
|
|
182
|
+
all_require_terms.extend(extracted)
|
|
183
|
+
all_require_terms = list(set(all_require_terms)) # dedupe
|
|
106
184
|
# Build list of filter conditions, excluding None values
|
|
107
185
|
conditions = []
|
|
108
186
|
|
|
@@ -126,10 +204,10 @@ class RagtimeDB:
|
|
|
126
204
|
where = {"$and": conditions}
|
|
127
205
|
|
|
128
206
|
# When using require_terms, fetch more results since we'll filter some out
|
|
129
|
-
fetch_limit = limit * 5 if
|
|
207
|
+
fetch_limit = limit * 5 if all_require_terms else limit
|
|
130
208
|
|
|
131
209
|
results = self.collection.query(
|
|
132
|
-
query_texts=[
|
|
210
|
+
query_texts=[search_query],
|
|
133
211
|
n_results=fetch_limit,
|
|
134
212
|
where=where,
|
|
135
213
|
)
|
|
@@ -139,13 +217,13 @@ class RagtimeDB:
|
|
|
139
217
|
if results["documents"] and results["documents"][0]:
|
|
140
218
|
for i, doc in enumerate(results["documents"][0]):
|
|
141
219
|
# Hybrid filtering: ensure required terms appear
|
|
142
|
-
if
|
|
220
|
+
if all_require_terms:
|
|
143
221
|
doc_lower = doc.lower()
|
|
144
222
|
# Also check file path in metadata for code/file matches
|
|
145
223
|
file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
|
|
146
224
|
combined_text = f"{doc_lower} {file_path}"
|
|
147
225
|
|
|
148
|
-
if not all(term.lower() in combined_text for term in
|
|
226
|
+
if not all(term.lower() in combined_text for term in all_require_terms):
|
|
149
227
|
continue
|
|
150
228
|
|
|
151
229
|
output.append({
|
|
@@ -160,6 +238,118 @@ class RagtimeDB:
|
|
|
160
238
|
|
|
161
239
|
return output
|
|
162
240
|
|
|
241
|
+
def search_tiered(
|
|
242
|
+
self,
|
|
243
|
+
query: str,
|
|
244
|
+
limit: int = 10,
|
|
245
|
+
namespace: str | None = None,
|
|
246
|
+
require_terms: list[str] | None = None,
|
|
247
|
+
auto_extract: bool = True,
|
|
248
|
+
**filters,
|
|
249
|
+
) -> list[dict]:
|
|
250
|
+
"""
|
|
251
|
+
Tiered search: prioritizes memories > docs > code.
|
|
252
|
+
|
|
253
|
+
Searches in priority order, filling up to limit:
|
|
254
|
+
1. Memories (curated, high-signal knowledge)
|
|
255
|
+
2. Documentation (indexed markdown)
|
|
256
|
+
3. Code (broadest, implementation details)
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
query: Natural language search query
|
|
260
|
+
limit: Max total results to return
|
|
261
|
+
namespace: Filter by namespace
|
|
262
|
+
require_terms: Terms that MUST appear in results
|
|
263
|
+
auto_extract: Auto-detect qualifiers from query
|
|
264
|
+
**filters: Additional metadata filters
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
List of dicts with 'content', 'metadata', 'distance', 'tier'
|
|
268
|
+
"""
|
|
269
|
+
results = []
|
|
270
|
+
|
|
271
|
+
# Tier 1: Memories (not docs or code)
|
|
272
|
+
memory_results = self._search_tier(
|
|
273
|
+
query=query,
|
|
274
|
+
tier_name="memory",
|
|
275
|
+
exclude_types=["docs", "code"],
|
|
276
|
+
limit=limit,
|
|
277
|
+
namespace=namespace,
|
|
278
|
+
require_terms=require_terms,
|
|
279
|
+
auto_extract=auto_extract,
|
|
280
|
+
**filters,
|
|
281
|
+
)
|
|
282
|
+
results.extend(memory_results)
|
|
283
|
+
|
|
284
|
+
# Tier 2: Documentation
|
|
285
|
+
if len(results) < limit:
|
|
286
|
+
doc_results = self._search_tier(
|
|
287
|
+
query=query,
|
|
288
|
+
tier_name="docs",
|
|
289
|
+
type_filter="docs",
|
|
290
|
+
limit=limit - len(results),
|
|
291
|
+
namespace=namespace,
|
|
292
|
+
require_terms=require_terms,
|
|
293
|
+
auto_extract=auto_extract,
|
|
294
|
+
**filters,
|
|
295
|
+
)
|
|
296
|
+
results.extend(doc_results)
|
|
297
|
+
|
|
298
|
+
# Tier 3: Code
|
|
299
|
+
if len(results) < limit:
|
|
300
|
+
code_results = self._search_tier(
|
|
301
|
+
query=query,
|
|
302
|
+
tier_name="code",
|
|
303
|
+
type_filter="code",
|
|
304
|
+
limit=limit - len(results),
|
|
305
|
+
namespace=namespace,
|
|
306
|
+
require_terms=require_terms,
|
|
307
|
+
auto_extract=auto_extract,
|
|
308
|
+
**filters,
|
|
309
|
+
)
|
|
310
|
+
results.extend(code_results)
|
|
311
|
+
|
|
312
|
+
return results
|
|
313
|
+
|
|
314
|
+
def _search_tier(
|
|
315
|
+
self,
|
|
316
|
+
query: str,
|
|
317
|
+
tier_name: str,
|
|
318
|
+
limit: int,
|
|
319
|
+
type_filter: str | None = None,
|
|
320
|
+
exclude_types: list[str] | None = None,
|
|
321
|
+
**kwargs,
|
|
322
|
+
) -> list[dict]:
|
|
323
|
+
"""Search a single tier and tag results."""
|
|
324
|
+
# Build where clause for exclusion if needed
|
|
325
|
+
if exclude_types:
|
|
326
|
+
# Search without type filter, then exclude in post-processing
|
|
327
|
+
results = self.search(
|
|
328
|
+
query=query,
|
|
329
|
+
limit=limit * 2, # fetch more since we'll filter
|
|
330
|
+
type_filter=None,
|
|
331
|
+
**kwargs,
|
|
332
|
+
)
|
|
333
|
+
# Filter out excluded types
|
|
334
|
+
filtered = []
|
|
335
|
+
for r in results:
|
|
336
|
+
if r["metadata"].get("type") not in exclude_types:
|
|
337
|
+
r["tier"] = tier_name
|
|
338
|
+
filtered.append(r)
|
|
339
|
+
if len(filtered) >= limit:
|
|
340
|
+
break
|
|
341
|
+
return filtered
|
|
342
|
+
else:
|
|
343
|
+
results = self.search(
|
|
344
|
+
query=query,
|
|
345
|
+
limit=limit,
|
|
346
|
+
type_filter=type_filter,
|
|
347
|
+
**kwargs,
|
|
348
|
+
)
|
|
349
|
+
for r in results:
|
|
350
|
+
r["tier"] = tier_name
|
|
351
|
+
return results
|
|
352
|
+
|
|
163
353
|
def delete(self, ids: list[str]) -> None:
|
|
164
354
|
"""Delete documents by ID."""
|
|
165
355
|
self.collection.delete(ids=ids)
|
src/feedback.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Feedback loop for RAG result quality improvement.
|
|
3
|
+
|
|
4
|
+
Tracks which search results are actually used/referenced by Claude,
|
|
5
|
+
enabling re-ranking and quality improvements over time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from dataclasses import dataclass, field, asdict
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class SearchFeedback:
|
|
17
|
+
"""Feedback for a single search result."""
|
|
18
|
+
query: str
|
|
19
|
+
result_id: str # ChromaDB document ID
|
|
20
|
+
result_file: str # File path for easier debugging
|
|
21
|
+
action: str # "used", "referenced", "ignored", "helpful", "not_helpful"
|
|
22
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
23
|
+
session_id: Optional[str] = None # Group related searches
|
|
24
|
+
position: int = 0 # Position in results (1-indexed)
|
|
25
|
+
distance: float = 0.0 # Original semantic distance
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FeedbackStore:
|
|
29
|
+
"""
|
|
30
|
+
Simple file-based feedback storage.
|
|
31
|
+
|
|
32
|
+
Stores feedback as JSON lines for easy analysis.
|
|
33
|
+
Can be upgraded to SQLite or ChromaDB later.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, path: Path):
|
|
37
|
+
"""
|
|
38
|
+
Initialize feedback store.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
path: Directory to store feedback data
|
|
42
|
+
"""
|
|
43
|
+
self.path = path
|
|
44
|
+
self.feedback_file = path / "feedback.jsonl"
|
|
45
|
+
self.stats_file = path / "feedback_stats.json"
|
|
46
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
def record(self, feedback: SearchFeedback) -> None:
|
|
49
|
+
"""Record a single feedback entry."""
|
|
50
|
+
with open(self.feedback_file, "a") as f:
|
|
51
|
+
f.write(json.dumps(asdict(feedback)) + "\n")
|
|
52
|
+
|
|
53
|
+
def record_usage(
|
|
54
|
+
self,
|
|
55
|
+
query: str,
|
|
56
|
+
result_id: str,
|
|
57
|
+
result_file: str,
|
|
58
|
+
position: int = 0,
|
|
59
|
+
distance: float = 0.0,
|
|
60
|
+
session_id: Optional[str] = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Convenience method to record when a result is used."""
|
|
63
|
+
self.record(SearchFeedback(
|
|
64
|
+
query=query,
|
|
65
|
+
result_id=result_id,
|
|
66
|
+
result_file=result_file,
|
|
67
|
+
action="used",
|
|
68
|
+
position=position,
|
|
69
|
+
distance=distance,
|
|
70
|
+
session_id=session_id,
|
|
71
|
+
))
|
|
72
|
+
|
|
73
|
+
def record_batch(
|
|
74
|
+
self,
|
|
75
|
+
query: str,
|
|
76
|
+
used_ids: list[str],
|
|
77
|
+
all_results: list[dict],
|
|
78
|
+
session_id: Optional[str] = None,
|
|
79
|
+
) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Record feedback for a batch of results.
|
|
82
|
+
|
|
83
|
+
Marks used_ids as "used" and others as "ignored".
|
|
84
|
+
"""
|
|
85
|
+
used_set = set(used_ids)
|
|
86
|
+
|
|
87
|
+
for i, result in enumerate(all_results):
|
|
88
|
+
result_id = result.get("id", "")
|
|
89
|
+
result_file = result.get("metadata", {}).get("file", "")
|
|
90
|
+
distance = result.get("distance", 0.0)
|
|
91
|
+
|
|
92
|
+
action = "used" if result_id in used_set else "ignored"
|
|
93
|
+
|
|
94
|
+
self.record(SearchFeedback(
|
|
95
|
+
query=query,
|
|
96
|
+
result_id=result_id,
|
|
97
|
+
result_file=result_file,
|
|
98
|
+
action=action,
|
|
99
|
+
position=i + 1,
|
|
100
|
+
distance=distance,
|
|
101
|
+
session_id=session_id,
|
|
102
|
+
))
|
|
103
|
+
|
|
104
|
+
def get_usage_stats(self) -> dict:
|
|
105
|
+
"""
|
|
106
|
+
Get aggregated usage statistics.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Dict with usage counts, popular files, etc.
|
|
110
|
+
"""
|
|
111
|
+
if not self.feedback_file.exists():
|
|
112
|
+
return {"total": 0, "used": 0, "ignored": 0}
|
|
113
|
+
|
|
114
|
+
stats = {
|
|
115
|
+
"total": 0,
|
|
116
|
+
"used": 0,
|
|
117
|
+
"ignored": 0,
|
|
118
|
+
"helpful": 0,
|
|
119
|
+
"not_helpful": 0,
|
|
120
|
+
"files_used": {}, # file -> count
|
|
121
|
+
"avg_position_used": 0.0,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
positions = []
|
|
125
|
+
|
|
126
|
+
with open(self.feedback_file) as f:
|
|
127
|
+
for line in f:
|
|
128
|
+
if not line.strip():
|
|
129
|
+
continue
|
|
130
|
+
try:
|
|
131
|
+
entry = json.loads(line)
|
|
132
|
+
stats["total"] += 1
|
|
133
|
+
action = entry.get("action", "")
|
|
134
|
+
|
|
135
|
+
if action == "used":
|
|
136
|
+
stats["used"] += 1
|
|
137
|
+
positions.append(entry.get("position", 0))
|
|
138
|
+
file_path = entry.get("result_file", "")
|
|
139
|
+
stats["files_used"][file_path] = stats["files_used"].get(file_path, 0) + 1
|
|
140
|
+
elif action == "ignored":
|
|
141
|
+
stats["ignored"] += 1
|
|
142
|
+
elif action == "helpful":
|
|
143
|
+
stats["helpful"] += 1
|
|
144
|
+
elif action == "not_helpful":
|
|
145
|
+
stats["not_helpful"] += 1
|
|
146
|
+
except json.JSONDecodeError:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
if positions:
|
|
150
|
+
stats["avg_position_used"] = sum(positions) / len(positions)
|
|
151
|
+
|
|
152
|
+
return stats
|
|
153
|
+
|
|
154
|
+
def get_boost_scores(self) -> dict[str, float]:
|
|
155
|
+
"""
|
|
156
|
+
Calculate boost scores for files based on historical usage.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Dict mapping file paths to boost multipliers (1.0 = no boost).
|
|
160
|
+
"""
|
|
161
|
+
stats = self.get_usage_stats()
|
|
162
|
+
files_used = stats.get("files_used", {})
|
|
163
|
+
|
|
164
|
+
if not files_used:
|
|
165
|
+
return {}
|
|
166
|
+
|
|
167
|
+
# Normalize to 0-1 range, then convert to boost multiplier
|
|
168
|
+
max_count = max(files_used.values())
|
|
169
|
+
boosts = {}
|
|
170
|
+
|
|
171
|
+
for file_path, count in files_used.items():
|
|
172
|
+
# Boost range: 1.0 (no boost) to 1.5 (50% boost for most-used)
|
|
173
|
+
normalized = count / max_count
|
|
174
|
+
boosts[file_path] = 1.0 + (normalized * 0.5)
|
|
175
|
+
|
|
176
|
+
return boosts
|
|
177
|
+
|
|
178
|
+
def apply_boosts(self, results: list[dict], boosts: dict[str, float]) -> list[dict]:
|
|
179
|
+
"""
|
|
180
|
+
Apply historical boost scores to search results.
|
|
181
|
+
|
|
182
|
+
Adjusts distances based on historical usage patterns.
|
|
183
|
+
Lower distance = more relevant, so we divide by boost.
|
|
184
|
+
"""
|
|
185
|
+
if not boosts:
|
|
186
|
+
return results
|
|
187
|
+
|
|
188
|
+
for result in results:
|
|
189
|
+
file_path = result.get("metadata", {}).get("file", "")
|
|
190
|
+
boost = boosts.get(file_path, 1.0)
|
|
191
|
+
if "distance" in result and result["distance"]:
|
|
192
|
+
# Reduce distance for frequently-used files
|
|
193
|
+
result["distance"] = result["distance"] / boost
|
|
194
|
+
result["boosted"] = boost > 1.0
|
|
195
|
+
|
|
196
|
+
# Re-sort by adjusted distance
|
|
197
|
+
return sorted(results, key=lambda r: r.get("distance", float("inf")))
|
|
198
|
+
|
|
199
|
+
def clear(self) -> None:
|
|
200
|
+
"""Clear all feedback data."""
|
|
201
|
+
if self.feedback_file.exists():
|
|
202
|
+
self.feedback_file.unlink()
|
src/indexers/docs.py
CHANGED
|
@@ -21,6 +21,10 @@ class DocEntry:
|
|
|
21
21
|
component: str | None = None
|
|
22
22
|
title: str | None = None
|
|
23
23
|
mtime: float | None = None # File modification time for incremental indexing
|
|
24
|
+
# Hierarchical chunking fields
|
|
25
|
+
section_path: str | None = None # e.g., "Installation > Configuration > Environment Variables"
|
|
26
|
+
section_level: int = 0 # Header depth (0=whole doc, 1=h1, 2=h2, etc.)
|
|
27
|
+
chunk_index: int = 0 # Position within file (for stable IDs)
|
|
24
28
|
|
|
25
29
|
def to_metadata(self) -> dict:
|
|
26
30
|
"""Convert to ChromaDB metadata dict."""
|
|
@@ -32,6 +36,8 @@ class DocEntry:
|
|
|
32
36
|
"component": self.component or "",
|
|
33
37
|
"title": self.title or Path(self.file_path).stem,
|
|
34
38
|
"mtime": self.mtime or 0.0,
|
|
39
|
+
"section_path": self.section_path or "",
|
|
40
|
+
"section_level": self.section_level,
|
|
35
41
|
}
|
|
36
42
|
|
|
37
43
|
|
|
@@ -56,33 +62,200 @@ def parse_frontmatter(content: str) -> tuple[dict, str]:
|
|
|
56
62
|
return {}, content
|
|
57
63
|
|
|
58
64
|
|
|
59
|
-
|
|
65
|
+
@dataclass
|
|
66
|
+
class Section:
|
|
67
|
+
"""A markdown section for hierarchical chunking."""
|
|
68
|
+
title: str
|
|
69
|
+
level: int # 1-6 for h1-h6
|
|
70
|
+
content: str
|
|
71
|
+
line_start: int
|
|
72
|
+
parent_path: list[str] # Parent headers for context
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def chunk_by_headers(
|
|
76
|
+
content: str,
|
|
77
|
+
min_chunk_size: int = 100,
|
|
78
|
+
max_chunk_size: int = 2000,
|
|
79
|
+
) -> list[Section]:
|
|
80
|
+
"""
|
|
81
|
+
Split markdown into sections by headers, preserving hierarchy.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
content: Markdown body (without frontmatter)
|
|
85
|
+
min_chunk_size: Minimum chars to make a standalone section
|
|
86
|
+
max_chunk_size: Maximum chars before splitting further
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
List of Section objects with hierarchical context
|
|
90
|
+
"""
|
|
91
|
+
lines = content.split('\n')
|
|
92
|
+
sections: list[Section] = []
|
|
93
|
+
header_stack: list[tuple[int, str]] = [] # (level, title) for building paths
|
|
94
|
+
|
|
95
|
+
current_section_lines: list[str] = []
|
|
96
|
+
current_section_start = 0
|
|
97
|
+
current_title = ""
|
|
98
|
+
current_level = 0
|
|
99
|
+
|
|
100
|
+
def flush_section():
|
|
101
|
+
"""Save accumulated lines as a section."""
|
|
102
|
+
nonlocal current_section_lines, current_section_start, current_title, current_level
|
|
103
|
+
|
|
104
|
+
text = '\n'.join(current_section_lines).strip()
|
|
105
|
+
if text:
|
|
106
|
+
# Build parent path from stack (excluding current)
|
|
107
|
+
parent_path = [h[1] for h in header_stack[:-1]] if header_stack else []
|
|
108
|
+
|
|
109
|
+
sections.append(Section(
|
|
110
|
+
title=current_title or "Introduction",
|
|
111
|
+
level=current_level,
|
|
112
|
+
content=text,
|
|
113
|
+
line_start=current_section_start,
|
|
114
|
+
parent_path=parent_path,
|
|
115
|
+
))
|
|
116
|
+
current_section_lines = []
|
|
117
|
+
|
|
118
|
+
for i, line in enumerate(lines):
|
|
119
|
+
# Detect markdown headers
|
|
120
|
+
header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
|
|
121
|
+
|
|
122
|
+
if header_match:
|
|
123
|
+
# Save previous section
|
|
124
|
+
flush_section()
|
|
125
|
+
|
|
126
|
+
level = len(header_match.group(1))
|
|
127
|
+
title = header_match.group(2).strip()
|
|
128
|
+
|
|
129
|
+
# Update header stack - pop headers at same or lower level
|
|
130
|
+
while header_stack and header_stack[-1][0] >= level:
|
|
131
|
+
header_stack.pop()
|
|
132
|
+
header_stack.append((level, title))
|
|
133
|
+
|
|
134
|
+
current_title = title
|
|
135
|
+
current_level = level
|
|
136
|
+
current_section_start = i
|
|
137
|
+
current_section_lines = [line] # Include header in content
|
|
138
|
+
else:
|
|
139
|
+
current_section_lines.append(line)
|
|
140
|
+
|
|
141
|
+
# Don't forget the last section
|
|
142
|
+
flush_section()
|
|
143
|
+
|
|
144
|
+
# Post-process: merge tiny sections into parents, split huge ones
|
|
145
|
+
processed: list[Section] = []
|
|
146
|
+
for section in sections:
|
|
147
|
+
if len(section.content) < min_chunk_size and processed:
|
|
148
|
+
# Merge into previous section
|
|
149
|
+
processed[-1].content += '\n\n' + section.content
|
|
150
|
+
elif len(section.content) > max_chunk_size:
|
|
151
|
+
# Split by paragraphs
|
|
152
|
+
paragraphs = re.split(r'\n\n+', section.content)
|
|
153
|
+
current_chunk = ""
|
|
154
|
+
chunk_num = 0
|
|
155
|
+
|
|
156
|
+
for para in paragraphs:
|
|
157
|
+
if len(current_chunk) + len(para) > max_chunk_size and current_chunk:
|
|
158
|
+
processed.append(Section(
|
|
159
|
+
title=f"{section.title} (part {chunk_num + 1})",
|
|
160
|
+
level=section.level,
|
|
161
|
+
content=current_chunk.strip(),
|
|
162
|
+
line_start=section.line_start,
|
|
163
|
+
parent_path=section.parent_path,
|
|
164
|
+
))
|
|
165
|
+
current_chunk = para
|
|
166
|
+
chunk_num += 1
|
|
167
|
+
else:
|
|
168
|
+
current_chunk += '\n\n' + para if current_chunk else para
|
|
169
|
+
|
|
170
|
+
if current_chunk.strip():
|
|
171
|
+
title = f"{section.title} (part {chunk_num + 1})" if chunk_num > 0 else section.title
|
|
172
|
+
processed.append(Section(
|
|
173
|
+
title=title,
|
|
174
|
+
level=section.level,
|
|
175
|
+
content=current_chunk.strip(),
|
|
176
|
+
line_start=section.line_start,
|
|
177
|
+
parent_path=section.parent_path,
|
|
178
|
+
))
|
|
179
|
+
else:
|
|
180
|
+
processed.append(section)
|
|
181
|
+
|
|
182
|
+
return processed
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def index_file(file_path: Path, hierarchical: bool = True) -> list[DocEntry]:
|
|
60
186
|
"""
|
|
61
|
-
Parse a single markdown file into
|
|
187
|
+
Parse a single markdown file into DocEntry objects.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
file_path: Path to the markdown file
|
|
191
|
+
hierarchical: If True, chunk by headers for better semantic search.
|
|
192
|
+
If False, return whole file as single entry.
|
|
62
193
|
|
|
63
|
-
Returns
|
|
194
|
+
Returns:
|
|
195
|
+
List of DocEntry objects (one per section if hierarchical, else one for whole file).
|
|
196
|
+
Empty list if file can't be parsed.
|
|
64
197
|
"""
|
|
65
198
|
try:
|
|
66
199
|
content = file_path.read_text(encoding='utf-8')
|
|
67
200
|
mtime = os.path.getmtime(file_path)
|
|
68
201
|
except (IOError, UnicodeDecodeError, OSError):
|
|
69
|
-
return
|
|
202
|
+
return []
|
|
70
203
|
|
|
71
204
|
metadata, body = parse_frontmatter(content)
|
|
72
205
|
|
|
73
206
|
# Skip empty documents
|
|
74
207
|
if not body.strip():
|
|
75
|
-
return
|
|
208
|
+
return []
|
|
76
209
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
210
|
+
# Base metadata from frontmatter
|
|
211
|
+
base_namespace = metadata.get("namespace")
|
|
212
|
+
base_category = metadata.get("category")
|
|
213
|
+
base_component = metadata.get("component")
|
|
214
|
+
base_title = metadata.get("title") or file_path.stem
|
|
215
|
+
|
|
216
|
+
# Short docs: return as single entry
|
|
217
|
+
if not hierarchical or len(body) < 500:
|
|
218
|
+
return [DocEntry(
|
|
219
|
+
content=body.strip(),
|
|
220
|
+
file_path=str(file_path),
|
|
221
|
+
namespace=base_namespace,
|
|
222
|
+
category=base_category,
|
|
223
|
+
component=base_component,
|
|
224
|
+
title=base_title,
|
|
225
|
+
mtime=mtime,
|
|
226
|
+
section_path="",
|
|
227
|
+
section_level=0,
|
|
228
|
+
chunk_index=0,
|
|
229
|
+
)]
|
|
230
|
+
|
|
231
|
+
# Hierarchical chunking for longer docs
|
|
232
|
+
sections = chunk_by_headers(body)
|
|
233
|
+
entries = []
|
|
234
|
+
|
|
235
|
+
for i, section in enumerate(sections):
|
|
236
|
+
# Build full section path: "Parent > Child > Current"
|
|
237
|
+
path_parts = section.parent_path + [section.title]
|
|
238
|
+
section_path = " > ".join(path_parts)
|
|
239
|
+
|
|
240
|
+
# Prepend context for better embeddings
|
|
241
|
+
context_prefix = f"# {base_title}\n"
|
|
242
|
+
if section.parent_path:
|
|
243
|
+
context_prefix += f"Section: {' > '.join(section.parent_path)}\n\n"
|
|
244
|
+
|
|
245
|
+
entries.append(DocEntry(
|
|
246
|
+
content=context_prefix + section.content,
|
|
247
|
+
file_path=str(file_path),
|
|
248
|
+
namespace=base_namespace,
|
|
249
|
+
category=base_category,
|
|
250
|
+
component=base_component,
|
|
251
|
+
title=section.title,
|
|
252
|
+
mtime=mtime,
|
|
253
|
+
section_path=section_path,
|
|
254
|
+
section_level=section.level,
|
|
255
|
+
chunk_index=i,
|
|
256
|
+
))
|
|
257
|
+
|
|
258
|
+
return entries
|
|
86
259
|
|
|
87
260
|
|
|
88
261
|
def discover_docs(
|
|
@@ -117,18 +290,23 @@ def discover_docs(
|
|
|
117
290
|
return files
|
|
118
291
|
|
|
119
292
|
|
|
120
|
-
def index_directory(root: Path, **kwargs) -> list[DocEntry]:
|
|
293
|
+
def index_directory(root: Path, hierarchical: bool = True, **kwargs) -> list[DocEntry]:
|
|
121
294
|
"""
|
|
122
295
|
Index all markdown files in a directory.
|
|
123
296
|
|
|
124
|
-
|
|
297
|
+
Args:
|
|
298
|
+
root: Directory to search
|
|
299
|
+
hierarchical: If True, chunk long docs by headers
|
|
300
|
+
**kwargs: Passed to discover_docs (patterns, exclude)
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
List of DocEntry objects ready for vector DB.
|
|
125
304
|
"""
|
|
126
305
|
files = discover_docs(root, **kwargs)
|
|
127
306
|
entries = []
|
|
128
307
|
|
|
129
308
|
for file_path in files:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
entries.append(entry)
|
|
309
|
+
file_entries = index_file(file_path, hierarchical=hierarchical)
|
|
310
|
+
entries.extend(file_entries)
|
|
133
311
|
|
|
134
312
|
return entries
|
src/mcp_server.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Any
|
|
|
13
13
|
|
|
14
14
|
from .db import RagtimeDB
|
|
15
15
|
from .memory import Memory, MemoryStore
|
|
16
|
+
from .feedback import FeedbackStore, SearchFeedback
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class RagtimeMCPServer:
|
|
@@ -28,6 +29,7 @@ class RagtimeMCPServer:
|
|
|
28
29
|
self.project_path = project_path or Path.cwd()
|
|
29
30
|
self._db = None
|
|
30
31
|
self._store = None
|
|
32
|
+
self._feedback = None
|
|
31
33
|
|
|
32
34
|
@property
|
|
33
35
|
def db(self) -> RagtimeDB:
|
|
@@ -44,6 +46,14 @@ class RagtimeMCPServer:
|
|
|
44
46
|
self._store = MemoryStore(self.project_path, self.db)
|
|
45
47
|
return self._store
|
|
46
48
|
|
|
49
|
+
@property
|
|
50
|
+
def feedback(self) -> FeedbackStore:
|
|
51
|
+
"""Lazy-load the feedback store."""
|
|
52
|
+
if self._feedback is None:
|
|
53
|
+
feedback_path = self.project_path / ".ragtime" / "feedback"
|
|
54
|
+
self._feedback = FeedbackStore(feedback_path)
|
|
55
|
+
return self._feedback
|
|
56
|
+
|
|
47
57
|
def get_author(self) -> str:
|
|
48
58
|
"""Get the current developer's username."""
|
|
49
59
|
try:
|
|
@@ -132,13 +142,18 @@ class RagtimeMCPServer:
|
|
|
132
142
|
},
|
|
133
143
|
{
|
|
134
144
|
"name": "search",
|
|
135
|
-
"description": "
|
|
145
|
+
"description": "Smart hybrid search over indexed content. Auto-detects qualifiers like 'mobile', 'auth', 'dart' and ensures they appear in results. Use tiered=true for priority ordering (memories > docs > code). Returns summaries with file paths - use Read tool for full implementations.",
|
|
136
146
|
"inputSchema": {
|
|
137
147
|
"type": "object",
|
|
138
148
|
"properties": {
|
|
139
149
|
"query": {
|
|
140
150
|
"type": "string",
|
|
141
|
-
"description": "Natural language search query"
|
|
151
|
+
"description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected."
|
|
152
|
+
},
|
|
153
|
+
"tiered": {
|
|
154
|
+
"type": "boolean",
|
|
155
|
+
"default": False,
|
|
156
|
+
"description": "If true, search in priority order: memories (curated) > docs > code. Good for conceptual queries."
|
|
142
157
|
},
|
|
143
158
|
"namespace": {
|
|
144
159
|
"type": "string",
|
|
@@ -146,7 +161,7 @@ class RagtimeMCPServer:
|
|
|
146
161
|
},
|
|
147
162
|
"type": {
|
|
148
163
|
"type": "string",
|
|
149
|
-
"description": "Filter by type (docs, code, architecture, etc.)"
|
|
164
|
+
"description": "Filter by type (docs, code, architecture, etc.). Ignored if tiered=true."
|
|
150
165
|
},
|
|
151
166
|
"component": {
|
|
152
167
|
"type": "string",
|
|
@@ -155,7 +170,12 @@ class RagtimeMCPServer:
|
|
|
155
170
|
"require_terms": {
|
|
156
171
|
"type": "array",
|
|
157
172
|
"items": {"type": "string"},
|
|
158
|
-
"description": "
|
|
173
|
+
"description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected."
|
|
174
|
+
},
|
|
175
|
+
"auto_extract": {
|
|
176
|
+
"type": "boolean",
|
|
177
|
+
"default": True,
|
|
178
|
+
"description": "Auto-detect component qualifiers from query. Set to false for literal search."
|
|
159
179
|
},
|
|
160
180
|
"limit": {
|
|
161
181
|
"type": "integer",
|
|
@@ -287,6 +307,42 @@ class RagtimeMCPServer:
|
|
|
287
307
|
},
|
|
288
308
|
"required": ["memory_id", "status"]
|
|
289
309
|
}
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
"name": "record_feedback",
|
|
313
|
+
"description": "Record feedback when search results are used or referenced. Call this after using a search result to improve future rankings.",
|
|
314
|
+
"inputSchema": {
|
|
315
|
+
"type": "object",
|
|
316
|
+
"properties": {
|
|
317
|
+
"query": {
|
|
318
|
+
"type": "string",
|
|
319
|
+
"description": "The original search query"
|
|
320
|
+
},
|
|
321
|
+
"result_file": {
|
|
322
|
+
"type": "string",
|
|
323
|
+
"description": "File path of the result that was used"
|
|
324
|
+
},
|
|
325
|
+
"action": {
|
|
326
|
+
"type": "string",
|
|
327
|
+
"enum": ["used", "referenced", "helpful", "not_helpful"],
|
|
328
|
+
"default": "used",
|
|
329
|
+
"description": "What happened with this result"
|
|
330
|
+
},
|
|
331
|
+
"position": {
|
|
332
|
+
"type": "integer",
|
|
333
|
+
"description": "Position in search results (1-indexed)"
|
|
334
|
+
}
|
|
335
|
+
},
|
|
336
|
+
"required": ["query", "result_file"]
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
{
|
|
340
|
+
"name": "feedback_stats",
|
|
341
|
+
"description": "Get statistics about search result usage patterns",
|
|
342
|
+
"inputSchema": {
|
|
343
|
+
"type": "object",
|
|
344
|
+
"properties": {}
|
|
345
|
+
}
|
|
290
346
|
}
|
|
291
347
|
]
|
|
292
348
|
|
|
@@ -308,6 +364,10 @@ class RagtimeMCPServer:
|
|
|
308
364
|
return self._graduate(arguments)
|
|
309
365
|
elif name == "update_status":
|
|
310
366
|
return self._update_status(arguments)
|
|
367
|
+
elif name == "record_feedback":
|
|
368
|
+
return self._record_feedback(arguments)
|
|
369
|
+
elif name == "feedback_stats":
|
|
370
|
+
return self._feedback_stats(arguments)
|
|
311
371
|
else:
|
|
312
372
|
raise ValueError(f"Unknown tool: {name}")
|
|
313
373
|
|
|
@@ -338,23 +398,43 @@ class RagtimeMCPServer:
|
|
|
338
398
|
}
|
|
339
399
|
|
|
340
400
|
def _search(self, args: dict) -> dict:
|
|
341
|
-
"""Search indexed content with
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
401
|
+
"""Search indexed content with smart query understanding."""
|
|
402
|
+
if args.get("tiered", False):
|
|
403
|
+
# Tiered search: memories > docs > code
|
|
404
|
+
results = self.db.search_tiered(
|
|
405
|
+
query=args["query"],
|
|
406
|
+
limit=args.get("limit", 10),
|
|
407
|
+
namespace=args.get("namespace"),
|
|
408
|
+
require_terms=args.get("require_terms"),
|
|
409
|
+
auto_extract=args.get("auto_extract", True),
|
|
410
|
+
component=args.get("component"),
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
results = self.db.search(
|
|
414
|
+
query=args["query"],
|
|
415
|
+
limit=args.get("limit", 10),
|
|
416
|
+
namespace=args.get("namespace"),
|
|
417
|
+
type_filter=args.get("type"),
|
|
418
|
+
component=args.get("component"),
|
|
419
|
+
require_terms=args.get("require_terms"),
|
|
420
|
+
auto_extract=args.get("auto_extract", True),
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Apply feedback-based boosts
|
|
424
|
+
boosts = self.feedback.get_boost_scores()
|
|
425
|
+
if boosts:
|
|
426
|
+
results = self.feedback.apply_boosts(results, boosts)
|
|
350
427
|
|
|
351
428
|
return {
|
|
352
429
|
"count": len(results),
|
|
430
|
+
"query": args["query"],
|
|
353
431
|
"results": [
|
|
354
432
|
{
|
|
355
433
|
"content": r["content"],
|
|
356
434
|
"metadata": r["metadata"],
|
|
357
435
|
"score": 1 - r["distance"] if r["distance"] else None,
|
|
436
|
+
"boosted": r.get("boosted", False),
|
|
437
|
+
"tier": r.get("tier"), # For tiered search
|
|
358
438
|
}
|
|
359
439
|
for r in results
|
|
360
440
|
]
|
|
@@ -479,6 +559,45 @@ class RagtimeMCPServer:
|
|
|
479
559
|
"status": args["status"],
|
|
480
560
|
}
|
|
481
561
|
|
|
562
|
+
def _record_feedback(self, args: dict) -> dict:
|
|
563
|
+
"""Record feedback for a search result."""
|
|
564
|
+
feedback = SearchFeedback(
|
|
565
|
+
query=args["query"],
|
|
566
|
+
result_id="", # We match by file path
|
|
567
|
+
result_file=args["result_file"],
|
|
568
|
+
action=args.get("action", "used"),
|
|
569
|
+
position=args.get("position", 0),
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
self.feedback.record(feedback)
|
|
573
|
+
|
|
574
|
+
return {
|
|
575
|
+
"success": True,
|
|
576
|
+
"query": args["query"],
|
|
577
|
+
"result_file": args["result_file"],
|
|
578
|
+
"action": feedback.action,
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
def _feedback_stats(self, args: dict) -> dict:
|
|
582
|
+
"""Get feedback statistics."""
|
|
583
|
+
stats = self.feedback.get_usage_stats()
|
|
584
|
+
boosts = self.feedback.get_boost_scores()
|
|
585
|
+
|
|
586
|
+
# Get top boosted files
|
|
587
|
+
top_files = sorted(boosts.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
588
|
+
|
|
589
|
+
return {
|
|
590
|
+
"total_feedback": stats["total"],
|
|
591
|
+
"results_used": stats["used"],
|
|
592
|
+
"results_ignored": stats["ignored"],
|
|
593
|
+
"helpful_count": stats["helpful"],
|
|
594
|
+
"not_helpful_count": stats["not_helpful"],
|
|
595
|
+
"avg_position_used": round(stats["avg_position_used"], 2),
|
|
596
|
+
"top_boosted_files": [
|
|
597
|
+
{"file": f, "boost": round(b, 2)} for f, b in top_files
|
|
598
|
+
],
|
|
599
|
+
}
|
|
600
|
+
|
|
482
601
|
def handle_message(self, message: dict) -> dict:
|
|
483
602
|
"""Handle an incoming JSON-RPC message."""
|
|
484
603
|
method = message.get("method")
|
|
@@ -493,7 +612,7 @@ class RagtimeMCPServer:
|
|
|
493
612
|
"protocolVersion": "2024-11-05",
|
|
494
613
|
"serverInfo": {
|
|
495
614
|
"name": "ragtime",
|
|
496
|
-
"version": "0.2.
|
|
615
|
+
"version": "0.2.15",
|
|
497
616
|
},
|
|
498
617
|
"capabilities": {
|
|
499
618
|
"tools": {},
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|