ragtime-cli 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragtime-cli
3
- Version: 0.2.14
3
+ Version: 0.2.15
4
4
  Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
5
5
  Author-email: Bret Martineau <bretwardjames@gmail.com>
6
6
  License-Expression: MIT
@@ -1,9 +1,10 @@
1
- ragtime_cli-0.2.14.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
1
+ ragtime_cli-0.2.15.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
2
2
  src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- src/cli.py,sha256=RO9LIvZrJ8AG7RaTpR3FsYP2NsShGKJjjiomgfH7xYc,76926
3
+ src/cli.py,sha256=RmH3M9NvZcIO4sjYgrEJJrD-mn2mcK4dPyqaBxrhdeU,76984
4
4
  src/config.py,sha256=tQ6gPLr4ksn2bJPIUjtELFr-k01Eg4g-LDo3GNE6P0Q,4600
5
- src/db.py,sha256=KcDaaqqNMDnodD8zIWC-_y3OE-kQ0Iib1YQ1ChwtOH8,11590
6
- src/mcp_server.py,sha256=n7T5gtgySilLDLhtaYnbHxBhr6Ys70F4ZpIu3lLOvHM,21973
5
+ src/db.py,sha256=eWqFGrg3O6hve67EzRJGcAsIpYxWJo4JlrAtlZUUA_s,15169
6
+ src/feedback.py,sha256=cPw_lzusZZPvkgUxs_eV67NtV1FoCfTXUulBPnD78lo,6455
7
+ src/mcp_server.py,sha256=QHU8jtPdA-kEzoXj88ZM0XhFvwhIngKD8Ow4plvHBfM,26498
7
8
  src/memory.py,sha256=UiHyudKbseMMY-sdcaDSfVBMGj6sFXXw1GxBsZ7nuBc,18450
8
9
  src/commands/audit.md,sha256=Xkucm-gfBIMalK9wf7NBbyejpsqBTUAGGlb7GxMtMPY,5137
9
10
  src/commands/create-pr.md,sha256=u6-jVkDP_6bJQp6ImK039eY9F6B9E2KlAVlvLY-WV6Q,9483
@@ -17,9 +18,9 @@ src/commands/save.md,sha256=7gTpW46AU9Y4l8XVZ8f4h1sEdBfVqIRA7hlidUxMAC4,251
17
18
  src/commands/start.md,sha256=qoqhkMgET74DBx8YPIT1-wqCiVBUDxlmevigsCinHSY,6506
18
19
  src/indexers/__init__.py,sha256=MYoCPZUpHakMX1s2vWnc9shjWfx_X1_0JzUhpKhnKUQ,454
19
20
  src/indexers/code.py,sha256=G2TbiKbWj0e7DV5KsU8-Ggw6ziDb4zTuZ4Bu3ryV4g8,18059
20
- src/indexers/docs.py,sha256=nyewQ4Ug4SCuhne4TuLDlUDzz9GH2STInddj81ocz50,3555
21
- ragtime_cli-0.2.14.dist-info/METADATA,sha256=n_VCE2tgWEFA7fIDEzZjYm8vy9kZPeAKgx9cu2Esbug,11269
22
- ragtime_cli-0.2.14.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
23
- ragtime_cli-0.2.14.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
24
- ragtime_cli-0.2.14.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
25
- ragtime_cli-0.2.14.dist-info/RECORD,,
21
+ src/indexers/docs.py,sha256=Q8krHYw0bybUyZaq1sJ0r6Fv-I_6BjTufhqI1eg_25s,9992
22
+ ragtime_cli-0.2.15.dist-info/METADATA,sha256=J0tETjffr7XYMo3VmUwtm6SqUUnsuPVkrNpw5VYcgd8,11269
23
+ ragtime_cli-0.2.15.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
24
+ ragtime_cli-0.2.15.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
25
+ ragtime_cli-0.2.15.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
26
+ ragtime_cli-0.2.15.dist-info/RECORD,,
src/cli.py CHANGED
@@ -381,13 +381,13 @@ def index(path: Path, index_type: str, clear: bool):
381
381
  item_show_func=lambda f: f.name[:30] if f else "",
382
382
  ) as files:
383
383
  for file_path in files:
384
- entry = index_doc_file(file_path)
385
- if entry:
386
- entries.append(entry)
384
+ # index_doc_file returns list (hierarchical chunks)
385
+ file_entries = index_doc_file(file_path)
386
+ entries.extend(file_entries)
387
387
 
388
388
  if entries:
389
389
  _upsert_entries(db, entries, "docs")
390
- click.echo(f" Indexed {len(entries)} documents")
390
+ click.echo(f" Indexed {len(entries)} document chunks")
391
391
  elif not to_delete:
392
392
  click.echo(" All docs up to date")
393
393
  else:
src/db.py CHANGED
@@ -238,6 +238,118 @@ class RagtimeDB:
238
238
 
239
239
  return output
240
240
 
241
+ def search_tiered(
242
+ self,
243
+ query: str,
244
+ limit: int = 10,
245
+ namespace: str | None = None,
246
+ require_terms: list[str] | None = None,
247
+ auto_extract: bool = True,
248
+ **filters,
249
+ ) -> list[dict]:
250
+ """
251
+ Tiered search: prioritizes memories > docs > code.
252
+
253
+ Searches in priority order, filling up to limit:
254
+ 1. Memories (curated, high-signal knowledge)
255
+ 2. Documentation (indexed markdown)
256
+ 3. Code (broadest, implementation details)
257
+
258
+ Args:
259
+ query: Natural language search query
260
+ limit: Max total results to return
261
+ namespace: Filter by namespace
262
+ require_terms: Terms that MUST appear in results
263
+ auto_extract: Auto-detect qualifiers from query
264
+ **filters: Additional metadata filters
265
+
266
+ Returns:
267
+ List of dicts with 'content', 'metadata', 'distance', 'tier'
268
+ """
269
+ results = []
270
+
271
+ # Tier 1: Memories (not docs or code)
272
+ memory_results = self._search_tier(
273
+ query=query,
274
+ tier_name="memory",
275
+ exclude_types=["docs", "code"],
276
+ limit=limit,
277
+ namespace=namespace,
278
+ require_terms=require_terms,
279
+ auto_extract=auto_extract,
280
+ **filters,
281
+ )
282
+ results.extend(memory_results)
283
+
284
+ # Tier 2: Documentation
285
+ if len(results) < limit:
286
+ doc_results = self._search_tier(
287
+ query=query,
288
+ tier_name="docs",
289
+ type_filter="docs",
290
+ limit=limit - len(results),
291
+ namespace=namespace,
292
+ require_terms=require_terms,
293
+ auto_extract=auto_extract,
294
+ **filters,
295
+ )
296
+ results.extend(doc_results)
297
+
298
+ # Tier 3: Code
299
+ if len(results) < limit:
300
+ code_results = self._search_tier(
301
+ query=query,
302
+ tier_name="code",
303
+ type_filter="code",
304
+ limit=limit - len(results),
305
+ namespace=namespace,
306
+ require_terms=require_terms,
307
+ auto_extract=auto_extract,
308
+ **filters,
309
+ )
310
+ results.extend(code_results)
311
+
312
+ return results
313
+
314
+ def _search_tier(
315
+ self,
316
+ query: str,
317
+ tier_name: str,
318
+ limit: int,
319
+ type_filter: str | None = None,
320
+ exclude_types: list[str] | None = None,
321
+ **kwargs,
322
+ ) -> list[dict]:
323
+ """Search a single tier and tag results."""
324
+ # Build where clause for exclusion if needed
325
+ if exclude_types:
326
+ # Search without type filter, then exclude in post-processing
327
+ results = self.search(
328
+ query=query,
329
+ limit=limit * 2, # fetch more since we'll filter
330
+ type_filter=None,
331
+ **kwargs,
332
+ )
333
+ # Filter out excluded types
334
+ filtered = []
335
+ for r in results:
336
+ if r["metadata"].get("type") not in exclude_types:
337
+ r["tier"] = tier_name
338
+ filtered.append(r)
339
+ if len(filtered) >= limit:
340
+ break
341
+ return filtered
342
+ else:
343
+ results = self.search(
344
+ query=query,
345
+ limit=limit,
346
+ type_filter=type_filter,
347
+ **kwargs,
348
+ )
349
+ for r in results:
350
+ r["tier"] = tier_name
351
+ return results
352
+
241
353
  def delete(self, ids: list[str]) -> None:
242
354
  """Delete documents by ID."""
243
355
  self.collection.delete(ids=ids)
src/feedback.py ADDED
@@ -0,0 +1,202 @@
1
+ """
2
+ Feedback loop for RAG result quality improvement.
3
+
4
+ Tracks which search results are actually used/referenced by Claude,
5
+ enabling re-ranking and quality improvements over time.
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from dataclasses import dataclass, field, asdict
11
+ from datetime import datetime
12
+ from typing import Optional
13
+
14
+
15
+ @dataclass
16
+ class SearchFeedback:
17
+ """Feedback for a single search result."""
18
+ query: str
19
+ result_id: str # ChromaDB document ID
20
+ result_file: str # File path for easier debugging
21
+ action: str # "used", "referenced", "ignored", "helpful", "not_helpful"
22
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
23
+ session_id: Optional[str] = None # Group related searches
24
+ position: int = 0 # Position in results (1-indexed)
25
+ distance: float = 0.0 # Original semantic distance
26
+
27
+
28
+ class FeedbackStore:
29
+ """
30
+ Simple file-based feedback storage.
31
+
32
+ Stores feedback as JSON lines for easy analysis.
33
+ Can be upgraded to SQLite or ChromaDB later.
34
+ """
35
+
36
+ def __init__(self, path: Path):
37
+ """
38
+ Initialize feedback store.
39
+
40
+ Args:
41
+ path: Directory to store feedback data
42
+ """
43
+ self.path = path
44
+ self.feedback_file = path / "feedback.jsonl"
45
+ self.stats_file = path / "feedback_stats.json"
46
+ path.mkdir(parents=True, exist_ok=True)
47
+
48
+ def record(self, feedback: SearchFeedback) -> None:
49
+ """Record a single feedback entry."""
50
+ with open(self.feedback_file, "a") as f:
51
+ f.write(json.dumps(asdict(feedback)) + "\n")
52
+
53
+ def record_usage(
54
+ self,
55
+ query: str,
56
+ result_id: str,
57
+ result_file: str,
58
+ position: int = 0,
59
+ distance: float = 0.0,
60
+ session_id: Optional[str] = None,
61
+ ) -> None:
62
+ """Convenience method to record when a result is used."""
63
+ self.record(SearchFeedback(
64
+ query=query,
65
+ result_id=result_id,
66
+ result_file=result_file,
67
+ action="used",
68
+ position=position,
69
+ distance=distance,
70
+ session_id=session_id,
71
+ ))
72
+
73
+ def record_batch(
74
+ self,
75
+ query: str,
76
+ used_ids: list[str],
77
+ all_results: list[dict],
78
+ session_id: Optional[str] = None,
79
+ ) -> None:
80
+ """
81
+ Record feedback for a batch of results.
82
+
83
+ Marks used_ids as "used" and others as "ignored".
84
+ """
85
+ used_set = set(used_ids)
86
+
87
+ for i, result in enumerate(all_results):
88
+ result_id = result.get("id", "")
89
+ result_file = result.get("metadata", {}).get("file", "")
90
+ distance = result.get("distance", 0.0)
91
+
92
+ action = "used" if result_id in used_set else "ignored"
93
+
94
+ self.record(SearchFeedback(
95
+ query=query,
96
+ result_id=result_id,
97
+ result_file=result_file,
98
+ action=action,
99
+ position=i + 1,
100
+ distance=distance,
101
+ session_id=session_id,
102
+ ))
103
+
104
+ def get_usage_stats(self) -> dict:
105
+ """
106
+ Get aggregated usage statistics.
107
+
108
+ Returns:
109
+ Dict with usage counts, popular files, etc.
110
+ """
111
+ if not self.feedback_file.exists():
112
+ return {"total": 0, "used": 0, "ignored": 0}
113
+
114
+ stats = {
115
+ "total": 0,
116
+ "used": 0,
117
+ "ignored": 0,
118
+ "helpful": 0,
119
+ "not_helpful": 0,
120
+ "files_used": {}, # file -> count
121
+ "avg_position_used": 0.0,
122
+ }
123
+
124
+ positions = []
125
+
126
+ with open(self.feedback_file) as f:
127
+ for line in f:
128
+ if not line.strip():
129
+ continue
130
+ try:
131
+ entry = json.loads(line)
132
+ stats["total"] += 1
133
+ action = entry.get("action", "")
134
+
135
+ if action == "used":
136
+ stats["used"] += 1
137
+ positions.append(entry.get("position", 0))
138
+ file_path = entry.get("result_file", "")
139
+ stats["files_used"][file_path] = stats["files_used"].get(file_path, 0) + 1
140
+ elif action == "ignored":
141
+ stats["ignored"] += 1
142
+ elif action == "helpful":
143
+ stats["helpful"] += 1
144
+ elif action == "not_helpful":
145
+ stats["not_helpful"] += 1
146
+ except json.JSONDecodeError:
147
+ continue
148
+
149
+ if positions:
150
+ stats["avg_position_used"] = sum(positions) / len(positions)
151
+
152
+ return stats
153
+
154
+ def get_boost_scores(self) -> dict[str, float]:
155
+ """
156
+ Calculate boost scores for files based on historical usage.
157
+
158
+ Returns:
159
+ Dict mapping file paths to boost multipliers (1.0 = no boost).
160
+ """
161
+ stats = self.get_usage_stats()
162
+ files_used = stats.get("files_used", {})
163
+
164
+ if not files_used:
165
+ return {}
166
+
167
+ # Normalize to 0-1 range, then convert to boost multiplier
168
+ max_count = max(files_used.values())
169
+ boosts = {}
170
+
171
+ for file_path, count in files_used.items():
172
+ # Boost range: 1.0 (no boost) to 1.5 (50% boost for most-used)
173
+ normalized = count / max_count
174
+ boosts[file_path] = 1.0 + (normalized * 0.5)
175
+
176
+ return boosts
177
+
178
+ def apply_boosts(self, results: list[dict], boosts: dict[str, float]) -> list[dict]:
179
+ """
180
+ Apply historical boost scores to search results.
181
+
182
+ Adjusts distances based on historical usage patterns.
183
+ Lower distance = more relevant, so we divide by boost.
184
+ """
185
+ if not boosts:
186
+ return results
187
+
188
+ for result in results:
189
+ file_path = result.get("metadata", {}).get("file", "")
190
+ boost = boosts.get(file_path, 1.0)
191
+ if "distance" in result and result["distance"]:
192
+ # Reduce distance for frequently-used files
193
+ result["distance"] = result["distance"] / boost
194
+ result["boosted"] = boost > 1.0
195
+
196
+ # Re-sort by adjusted distance
197
+ return sorted(results, key=lambda r: r.get("distance", float("inf")))
198
+
199
+ def clear(self) -> None:
200
+ """Clear all feedback data."""
201
+ if self.feedback_file.exists():
202
+ self.feedback_file.unlink()
src/indexers/docs.py CHANGED
@@ -21,6 +21,10 @@ class DocEntry:
21
21
  component: str | None = None
22
22
  title: str | None = None
23
23
  mtime: float | None = None # File modification time for incremental indexing
24
+ # Hierarchical chunking fields
25
+ section_path: str | None = None # e.g., "Installation > Configuration > Environment Variables"
26
+ section_level: int = 0 # Header depth (0=whole doc, 1=h1, 2=h2, etc.)
27
+ chunk_index: int = 0 # Position within file (for stable IDs)
24
28
 
25
29
  def to_metadata(self) -> dict:
26
30
  """Convert to ChromaDB metadata dict."""
@@ -32,6 +36,8 @@ class DocEntry:
32
36
  "component": self.component or "",
33
37
  "title": self.title or Path(self.file_path).stem,
34
38
  "mtime": self.mtime or 0.0,
39
+ "section_path": self.section_path or "",
40
+ "section_level": self.section_level,
35
41
  }
36
42
 
37
43
 
@@ -56,33 +62,200 @@ def parse_frontmatter(content: str) -> tuple[dict, str]:
56
62
  return {}, content
57
63
 
58
64
 
59
- def index_file(file_path: Path) -> DocEntry | None:
65
+ @dataclass
66
+ class Section:
67
+ """A markdown section for hierarchical chunking."""
68
+ title: str
69
+ level: int # 1-6 for h1-h6
70
+ content: str
71
+ line_start: int
72
+ parent_path: list[str] # Parent headers for context
73
+
74
+
75
+ def chunk_by_headers(
76
+ content: str,
77
+ min_chunk_size: int = 100,
78
+ max_chunk_size: int = 2000,
79
+ ) -> list[Section]:
80
+ """
81
+ Split markdown into sections by headers, preserving hierarchy.
82
+
83
+ Args:
84
+ content: Markdown body (without frontmatter)
85
+ min_chunk_size: Minimum chars to make a standalone section
86
+ max_chunk_size: Maximum chars before splitting further
87
+
88
+ Returns:
89
+ List of Section objects with hierarchical context
90
+ """
91
+ lines = content.split('\n')
92
+ sections: list[Section] = []
93
+ header_stack: list[tuple[int, str]] = [] # (level, title) for building paths
94
+
95
+ current_section_lines: list[str] = []
96
+ current_section_start = 0
97
+ current_title = ""
98
+ current_level = 0
99
+
100
+ def flush_section():
101
+ """Save accumulated lines as a section."""
102
+ nonlocal current_section_lines, current_section_start, current_title, current_level
103
+
104
+ text = '\n'.join(current_section_lines).strip()
105
+ if text:
106
+ # Build parent path from stack (excluding current)
107
+ parent_path = [h[1] for h in header_stack[:-1]] if header_stack else []
108
+
109
+ sections.append(Section(
110
+ title=current_title or "Introduction",
111
+ level=current_level,
112
+ content=text,
113
+ line_start=current_section_start,
114
+ parent_path=parent_path,
115
+ ))
116
+ current_section_lines = []
117
+
118
+ for i, line in enumerate(lines):
119
+ # Detect markdown headers
120
+ header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
121
+
122
+ if header_match:
123
+ # Save previous section
124
+ flush_section()
125
+
126
+ level = len(header_match.group(1))
127
+ title = header_match.group(2).strip()
128
+
129
+ # Update header stack - pop headers at same or lower level
130
+ while header_stack and header_stack[-1][0] >= level:
131
+ header_stack.pop()
132
+ header_stack.append((level, title))
133
+
134
+ current_title = title
135
+ current_level = level
136
+ current_section_start = i
137
+ current_section_lines = [line] # Include header in content
138
+ else:
139
+ current_section_lines.append(line)
140
+
141
+ # Don't forget the last section
142
+ flush_section()
143
+
144
+ # Post-process: merge tiny sections into parents, split huge ones
145
+ processed: list[Section] = []
146
+ for section in sections:
147
+ if len(section.content) < min_chunk_size and processed:
148
+ # Merge into previous section
149
+ processed[-1].content += '\n\n' + section.content
150
+ elif len(section.content) > max_chunk_size:
151
+ # Split by paragraphs
152
+ paragraphs = re.split(r'\n\n+', section.content)
153
+ current_chunk = ""
154
+ chunk_num = 0
155
+
156
+ for para in paragraphs:
157
+ if len(current_chunk) + len(para) > max_chunk_size and current_chunk:
158
+ processed.append(Section(
159
+ title=f"{section.title} (part {chunk_num + 1})",
160
+ level=section.level,
161
+ content=current_chunk.strip(),
162
+ line_start=section.line_start,
163
+ parent_path=section.parent_path,
164
+ ))
165
+ current_chunk = para
166
+ chunk_num += 1
167
+ else:
168
+ current_chunk += '\n\n' + para if current_chunk else para
169
+
170
+ if current_chunk.strip():
171
+ title = f"{section.title} (part {chunk_num + 1})" if chunk_num > 0 else section.title
172
+ processed.append(Section(
173
+ title=title,
174
+ level=section.level,
175
+ content=current_chunk.strip(),
176
+ line_start=section.line_start,
177
+ parent_path=section.parent_path,
178
+ ))
179
+ else:
180
+ processed.append(section)
181
+
182
+ return processed
183
+
184
+
185
+ def index_file(file_path: Path, hierarchical: bool = True) -> list[DocEntry]:
60
186
  """
61
- Parse a single markdown file into a DocEntry.
187
+ Parse a single markdown file into DocEntry objects.
188
+
189
+ Args:
190
+ file_path: Path to the markdown file
191
+ hierarchical: If True, chunk by headers for better semantic search.
192
+ If False, return whole file as single entry.
62
193
 
63
- Returns None if file can't be parsed.
194
+ Returns:
195
+ List of DocEntry objects (one per section if hierarchical, else one for whole file).
196
+ Empty list if file can't be parsed.
64
197
  """
65
198
  try:
66
199
  content = file_path.read_text(encoding='utf-8')
67
200
  mtime = os.path.getmtime(file_path)
68
201
  except (IOError, UnicodeDecodeError, OSError):
69
- return None
202
+ return []
70
203
 
71
204
  metadata, body = parse_frontmatter(content)
72
205
 
73
206
  # Skip empty documents
74
207
  if not body.strip():
75
- return None
208
+ return []
76
209
 
77
- return DocEntry(
78
- content=body.strip(),
79
- file_path=str(file_path),
80
- namespace=metadata.get("namespace"),
81
- category=metadata.get("category"),
82
- component=metadata.get("component"),
83
- title=metadata.get("title"),
84
- mtime=mtime,
85
- )
210
+ # Base metadata from frontmatter
211
+ base_namespace = metadata.get("namespace")
212
+ base_category = metadata.get("category")
213
+ base_component = metadata.get("component")
214
+ base_title = metadata.get("title") or file_path.stem
215
+
216
+ # Short docs: return as single entry
217
+ if not hierarchical or len(body) < 500:
218
+ return [DocEntry(
219
+ content=body.strip(),
220
+ file_path=str(file_path),
221
+ namespace=base_namespace,
222
+ category=base_category,
223
+ component=base_component,
224
+ title=base_title,
225
+ mtime=mtime,
226
+ section_path="",
227
+ section_level=0,
228
+ chunk_index=0,
229
+ )]
230
+
231
+ # Hierarchical chunking for longer docs
232
+ sections = chunk_by_headers(body)
233
+ entries = []
234
+
235
+ for i, section in enumerate(sections):
236
+ # Build full section path: "Parent > Child > Current"
237
+ path_parts = section.parent_path + [section.title]
238
+ section_path = " > ".join(path_parts)
239
+
240
+ # Prepend context for better embeddings
241
+ context_prefix = f"# {base_title}\n"
242
+ if section.parent_path:
243
+ context_prefix += f"Section: {' > '.join(section.parent_path)}\n\n"
244
+
245
+ entries.append(DocEntry(
246
+ content=context_prefix + section.content,
247
+ file_path=str(file_path),
248
+ namespace=base_namespace,
249
+ category=base_category,
250
+ component=base_component,
251
+ title=section.title,
252
+ mtime=mtime,
253
+ section_path=section_path,
254
+ section_level=section.level,
255
+ chunk_index=i,
256
+ ))
257
+
258
+ return entries
86
259
 
87
260
 
88
261
  def discover_docs(
@@ -117,18 +290,23 @@ def discover_docs(
117
290
  return files
118
291
 
119
292
 
120
- def index_directory(root: Path, **kwargs) -> list[DocEntry]:
293
+ def index_directory(root: Path, hierarchical: bool = True, **kwargs) -> list[DocEntry]:
121
294
  """
122
295
  Index all markdown files in a directory.
123
296
 
124
- Returns list of DocEntry objects ready for vector DB.
297
+ Args:
298
+ root: Directory to search
299
+ hierarchical: If True, chunk long docs by headers
300
+ **kwargs: Passed to discover_docs (patterns, exclude)
301
+
302
+ Returns:
303
+ List of DocEntry objects ready for vector DB.
125
304
  """
126
305
  files = discover_docs(root, **kwargs)
127
306
  entries = []
128
307
 
129
308
  for file_path in files:
130
- entry = index_file(file_path)
131
- if entry:
132
- entries.append(entry)
309
+ file_entries = index_file(file_path, hierarchical=hierarchical)
310
+ entries.extend(file_entries)
133
311
 
134
312
  return entries
src/mcp_server.py CHANGED
@@ -13,6 +13,7 @@ from typing import Any
13
13
 
14
14
  from .db import RagtimeDB
15
15
  from .memory import Memory, MemoryStore
16
+ from .feedback import FeedbackStore, SearchFeedback
16
17
 
17
18
 
18
19
  class RagtimeMCPServer:
@@ -28,6 +29,7 @@ class RagtimeMCPServer:
28
29
  self.project_path = project_path or Path.cwd()
29
30
  self._db = None
30
31
  self._store = None
32
+ self._feedback = None
31
33
 
32
34
  @property
33
35
  def db(self) -> RagtimeDB:
@@ -44,6 +46,14 @@ class RagtimeMCPServer:
44
46
  self._store = MemoryStore(self.project_path, self.db)
45
47
  return self._store
46
48
 
49
+ @property
50
+ def feedback(self) -> FeedbackStore:
51
+ """Lazy-load the feedback store."""
52
+ if self._feedback is None:
53
+ feedback_path = self.project_path / ".ragtime" / "feedback"
54
+ self._feedback = FeedbackStore(feedback_path)
55
+ return self._feedback
56
+
47
57
  def get_author(self) -> str:
48
58
  """Get the current developer's username."""
49
59
  try:
@@ -132,13 +142,18 @@ class RagtimeMCPServer:
132
142
  },
133
143
  {
134
144
  "name": "search",
135
- "description": "Smart hybrid search over indexed code and docs. Auto-detects qualifiers like 'mobile', 'auth', 'dart' in your query and ensures they appear in results. Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations.",
145
+ "description": "Smart hybrid search over indexed content. Auto-detects qualifiers like 'mobile', 'auth', 'dart' and ensures they appear in results. Use tiered=true for priority ordering (memories > docs > code). Returns summaries with file paths - use Read tool for full implementations.",
136
146
  "inputSchema": {
137
147
  "type": "object",
138
148
  "properties": {
139
149
  "query": {
140
150
  "type": "string",
141
- "description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected and used for filtering."
151
+ "description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected."
152
+ },
153
+ "tiered": {
154
+ "type": "boolean",
155
+ "default": False,
156
+ "description": "If true, search in priority order: memories (curated) > docs > code. Good for conceptual queries."
142
157
  },
143
158
  "namespace": {
144
159
  "type": "string",
@@ -146,7 +161,7 @@ class RagtimeMCPServer:
146
161
  },
147
162
  "type": {
148
163
  "type": "string",
149
- "description": "Filter by type (docs, code, architecture, etc.)"
164
+ "description": "Filter by type (docs, code, architecture, etc.). Ignored if tiered=true."
150
165
  },
151
166
  "component": {
152
167
  "type": "string",
@@ -155,12 +170,12 @@ class RagtimeMCPServer:
155
170
  "require_terms": {
156
171
  "type": "array",
157
172
  "items": {"type": "string"},
158
- "description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected from the query."
173
+ "description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected."
159
174
  },
160
175
  "auto_extract": {
161
176
  "type": "boolean",
162
177
  "default": True,
163
- "description": "Auto-detect component qualifiers from query (default: true). Set to false for literal/raw search."
178
+ "description": "Auto-detect component qualifiers from query. Set to false for literal search."
164
179
  },
165
180
  "limit": {
166
181
  "type": "integer",
@@ -292,6 +307,42 @@ class RagtimeMCPServer:
292
307
  },
293
308
  "required": ["memory_id", "status"]
294
309
  }
310
+ },
311
+ {
312
+ "name": "record_feedback",
313
+ "description": "Record feedback when search results are used or referenced. Call this after using a search result to improve future rankings.",
314
+ "inputSchema": {
315
+ "type": "object",
316
+ "properties": {
317
+ "query": {
318
+ "type": "string",
319
+ "description": "The original search query"
320
+ },
321
+ "result_file": {
322
+ "type": "string",
323
+ "description": "File path of the result that was used"
324
+ },
325
+ "action": {
326
+ "type": "string",
327
+ "enum": ["used", "referenced", "helpful", "not_helpful"],
328
+ "default": "used",
329
+ "description": "What happened with this result"
330
+ },
331
+ "position": {
332
+ "type": "integer",
333
+ "description": "Position in search results (1-indexed)"
334
+ }
335
+ },
336
+ "required": ["query", "result_file"]
337
+ }
338
+ },
339
+ {
340
+ "name": "feedback_stats",
341
+ "description": "Get statistics about search result usage patterns",
342
+ "inputSchema": {
343
+ "type": "object",
344
+ "properties": {}
345
+ }
295
346
  }
296
347
  ]
297
348
 
@@ -313,6 +364,10 @@ class RagtimeMCPServer:
313
364
  return self._graduate(arguments)
314
365
  elif name == "update_status":
315
366
  return self._update_status(arguments)
367
+ elif name == "record_feedback":
368
+ return self._record_feedback(arguments)
369
+ elif name == "feedback_stats":
370
+ return self._feedback_stats(arguments)
316
371
  else:
317
372
  raise ValueError(f"Unknown tool: {name}")
318
373
 
@@ -344,23 +399,42 @@ class RagtimeMCPServer:
344
399
 
345
400
  def _search(self, args: dict) -> dict:
346
401
  """Search indexed content with smart query understanding."""
347
- results = self.db.search(
348
- query=args["query"],
349
- limit=args.get("limit", 10),
350
- namespace=args.get("namespace"),
351
- type_filter=args.get("type"),
352
- component=args.get("component"),
353
- require_terms=args.get("require_terms"),
354
- auto_extract=args.get("auto_extract", True),
355
- )
402
+ if args.get("tiered", False):
403
+ # Tiered search: memories > docs > code
404
+ results = self.db.search_tiered(
405
+ query=args["query"],
406
+ limit=args.get("limit", 10),
407
+ namespace=args.get("namespace"),
408
+ require_terms=args.get("require_terms"),
409
+ auto_extract=args.get("auto_extract", True),
410
+ component=args.get("component"),
411
+ )
412
+ else:
413
+ results = self.db.search(
414
+ query=args["query"],
415
+ limit=args.get("limit", 10),
416
+ namespace=args.get("namespace"),
417
+ type_filter=args.get("type"),
418
+ component=args.get("component"),
419
+ require_terms=args.get("require_terms"),
420
+ auto_extract=args.get("auto_extract", True),
421
+ )
422
+
423
+ # Apply feedback-based boosts
424
+ boosts = self.feedback.get_boost_scores()
425
+ if boosts:
426
+ results = self.feedback.apply_boosts(results, boosts)
356
427
 
357
428
  return {
358
429
  "count": len(results),
430
+ "query": args["query"],
359
431
  "results": [
360
432
  {
361
433
  "content": r["content"],
362
434
  "metadata": r["metadata"],
363
435
  "score": 1 - r["distance"] if r["distance"] else None,
436
+ "boosted": r.get("boosted", False),
437
+ "tier": r.get("tier"), # For tiered search
364
438
  }
365
439
  for r in results
366
440
  ]
@@ -485,6 +559,45 @@ class RagtimeMCPServer:
485
559
  "status": args["status"],
486
560
  }
487
561
 
562
+ def _record_feedback(self, args: dict) -> dict:
563
+ """Record feedback for a search result."""
564
+ feedback = SearchFeedback(
565
+ query=args["query"],
566
+ result_id="", # We match by file path
567
+ result_file=args["result_file"],
568
+ action=args.get("action", "used"),
569
+ position=args.get("position", 0),
570
+ )
571
+
572
+ self.feedback.record(feedback)
573
+
574
+ return {
575
+ "success": True,
576
+ "query": args["query"],
577
+ "result_file": args["result_file"],
578
+ "action": feedback.action,
579
+ }
580
+
581
+ def _feedback_stats(self, args: dict) -> dict:
582
+ """Get feedback statistics."""
583
+ stats = self.feedback.get_usage_stats()
584
+ boosts = self.feedback.get_boost_scores()
585
+
586
+ # Get top boosted files
587
+ top_files = sorted(boosts.items(), key=lambda x: x[1], reverse=True)[:10]
588
+
589
+ return {
590
+ "total_feedback": stats["total"],
591
+ "results_used": stats["used"],
592
+ "results_ignored": stats["ignored"],
593
+ "helpful_count": stats["helpful"],
594
+ "not_helpful_count": stats["not_helpful"],
595
+ "avg_position_used": round(stats["avg_position_used"], 2),
596
+ "top_boosted_files": [
597
+ {"file": f, "boost": round(b, 2)} for f, b in top_files
598
+ ],
599
+ }
600
+
488
601
  def handle_message(self, message: dict) -> dict:
489
602
  """Handle an incoming JSON-RPC message."""
490
603
  method = message.get("method")
@@ -499,7 +612,7 @@ class RagtimeMCPServer:
499
612
  "protocolVersion": "2024-11-05",
500
613
  "serverInfo": {
501
614
  "name": "ragtime",
502
- "version": "0.2.14",
615
+ "version": "0.2.15",
503
616
  },
504
617
  "capabilities": {
505
618
  "tools": {},