@ebowwa/claudecodehistory 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,291 @@
1
+ //! Full-text search module using Tantivy
2
+ //!
3
+ //! Provides high-performance indexed search for Claude Code conversation history.
4
+ //! Replaces linear JSONL scans with BM25-ranked full-text search.
5
+
6
+ mod schema;
7
+ mod index;
8
+ mod query;
9
+
10
+ pub use schema::*;
11
+ pub use index::*;
12
+ pub use query::*;
13
+
14
+ use std::path::PathBuf;
15
+ use std::sync::{Arc, RwLock};
16
+ use tantivy::schema::Schema;
17
+ use tantivy::{Index, TantivyDocument};
18
+
19
+ use crate::types::*;
20
+
21
+ /// Search index for Claude Code conversation history
22
+ pub struct HistorySearchIndex {
23
+ index_path: PathBuf,
24
+ index: Arc<RwLock<Option<Index>>>,
25
+ schema: Schema,
26
+ }
27
+
28
+ impl HistorySearchIndex {
29
+ /// Create or open a search index at the specified path
30
+ pub fn new(index_path: impl Into<PathBuf>) -> Result<Self, SearchError> {
31
+ let path = index_path.into();
32
+ let schema = create_history_schema();
33
+
34
+ let index = if path.exists() {
35
+ Index::open_in_dir(&path).map_err(|e| SearchError::IndexOpen(e.to_string()))?
36
+ } else {
37
+ std::fs::create_dir_all(&path).map_err(|e| SearchError::IndexCreate(e.to_string()))?;
38
+ Index::create_in_dir(&path, schema.clone()).map_err(|e| SearchError::IndexCreate(e.to_string()))?
39
+ };
40
+
41
+ Ok(Self {
42
+ index_path: path,
43
+ index: Arc::new(RwLock::new(Some(index))),
44
+ schema,
45
+ })
46
+ }
47
+
48
+ /// Index a conversation entry
49
+ pub fn index_entry(&self, entry: &ConversationEntry) -> Result<(), SearchError> {
50
+ let index_guard = self.index.read()
51
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
52
+
53
+ let index = index_guard.as_ref()
54
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
55
+
56
+ let mut writer: tantivy::IndexWriter<TantivyDocument> = index.writer(50_000_000)
57
+ .map_err(|e| SearchError::Writer(e.to_string()))?;
58
+
59
+ add_entry_to_index(&mut writer, &self.schema, entry)?;
60
+
61
+ writer.commit().map_err(|e| SearchError::Commit(e.to_string()))?;
62
+ Ok(())
63
+ }
64
+
65
+ /// Index multiple entries in batch
66
+ pub fn index_entries(&self, entries: &[ConversationEntry]) -> Result<usize, SearchError> {
67
+ let index_guard = self.index.read()
68
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
69
+
70
+ let index = index_guard.as_ref()
71
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
72
+
73
+ let mut writer: tantivy::IndexWriter<TantivyDocument> = index.writer(50_000_000)
74
+ .map_err(|e| SearchError::Writer(e.to_string()))?;
75
+
76
+ let mut count = 0;
77
+ for entry in entries {
78
+ if add_entry_to_index(&mut writer, &self.schema, entry).is_ok() {
79
+ count += 1;
80
+ }
81
+ }
82
+
83
+ writer.commit().map_err(|e| SearchError::Commit(e.to_string()))?;
84
+ Ok(count)
85
+ }
86
+
87
+ /// Search the index with a text query
88
+ pub fn search(&self, query: &str, options: IndexedSearchOptions) -> Result<SearchResults, SearchError> {
89
+ let index_guard = self.index.read()
90
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
91
+
92
+ let index = index_guard.as_ref()
93
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
94
+
95
+ execute_search(index, &self.schema, query, options)
96
+ }
97
+
98
+ /// Advanced search with filters
99
+ pub fn advanced_search(&self, options: AdvancedSearchOptions) -> Result<SearchResults, SearchError> {
100
+ let index_guard = self.index.read()
101
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
102
+
103
+ let index = index_guard.as_ref()
104
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
105
+
106
+ execute_advanced_search(index, &self.schema, options)
107
+ }
108
+
109
+ /// Delete all entries for a session
110
+ pub fn delete_session(&self, session_id: &str) -> Result<(), SearchError> {
111
+ let index_guard = self.index.read()
112
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
113
+
114
+ let index = index_guard.as_ref()
115
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
116
+
117
+ let mut writer = index.writer(50_000_000)
118
+ .map_err(|e| SearchError::Writer(e.to_string()))?;
119
+
120
+ delete_session_from_index(&mut writer, &self.schema, session_id)?;
121
+
122
+ writer.commit().map_err(|e| SearchError::Commit(e.to_string()))?;
123
+ Ok(())
124
+ }
125
+
126
+ /// Clear the entire index
127
+ pub fn clear(&self) -> Result<(), SearchError> {
128
+ let index_guard = self.index.read()
129
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
130
+
131
+ let index = index_guard.as_ref()
132
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
133
+
134
+ let mut writer: tantivy::IndexWriter<TantivyDocument> = index.writer(50_000_000)
135
+ .map_err(|e| SearchError::Writer(e.to_string()))?;
136
+
137
+ writer.delete_all_documents().map_err(|e| SearchError::Commit(e.to_string()))?;
138
+ writer.commit().map_err(|e| SearchError::Commit(e.to_string()))?;
139
+
140
+ Ok(())
141
+ }
142
+
143
+ /// Optimize the index for better search performance
144
+ pub fn optimize(&self) -> Result<(), SearchError> {
145
+ let index_guard = self.index.read()
146
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
147
+
148
+ let index = index_guard.as_ref()
149
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
150
+
151
+ let writer: tantivy::IndexWriter<TantivyDocument> = index.writer(50_000_000)
152
+ .map_err(|e| SearchError::Writer(e.to_string()))?;
153
+
154
+ writer.wait_merging_threads().map_err(|e| SearchError::Merge(e.to_string()))?;
155
+
156
+ Ok(())
157
+ }
158
+
159
+ /// Get index statistics
160
+ pub fn stats(&self) -> Result<IndexStats, SearchError> {
161
+ let index_guard = self.index.read()
162
+ .map_err(|e| SearchError::Lock(e.to_string()))?;
163
+
164
+ let index = index_guard.as_ref()
165
+ .ok_or_else(|| SearchError::IndexNotInitialized)?;
166
+
167
+ get_index_stats(index, &self.schema)
168
+ }
169
+
170
+ /// Get the index path
171
+ pub fn path(&self) -> &std::path::Path {
172
+ &self.index_path
173
+ }
174
+ }
175
+
176
+ /// Search error types
177
+ #[derive(Debug, thiserror::Error)]
178
+ pub enum SearchError {
179
+ #[error("Failed to open index: {0}")]
180
+ IndexOpen(String),
181
+
182
+ #[error("Failed to create index: {0}")]
183
+ IndexCreate(String),
184
+
185
+ #[error("Index not initialized")]
186
+ IndexNotInitialized,
187
+
188
+ #[error("Lock error: {0}")]
189
+ Lock(String),
190
+
191
+ #[error("Writer error: {0}")]
192
+ Writer(String),
193
+
194
+ #[error("Commit error: {0}")]
195
+ Commit(String),
196
+
197
+ #[error("Merge error: {0}")]
198
+ Merge(String),
199
+
200
+ #[error("Query error: {0}")]
201
+ Query(String),
202
+
203
+ #[error("Schema error: {0}")]
204
+ Schema(String),
205
+
206
+ #[error("IO error: {0}")]
207
+ Io(String),
208
+ }
209
+
210
+ /// Basic search options for indexed search
211
+ #[derive(Debug, Clone, Default)]
212
+ pub struct IndexedSearchOptions {
213
+ pub limit: Option<usize>,
214
+ pub offset: Option<usize>,
215
+ pub session_id: Option<String>,
216
+ pub project_path: Option<String>,
217
+ pub message_types: Option<Vec<String>>,
218
+ pub timezone: Option<String>,
219
+ }
220
+
221
+ /// Advanced search options with filters
222
+ #[derive(Debug, Clone, Default)]
223
+ pub struct AdvancedSearchOptions {
224
+ /// Text query (optional - can filter without text search)
225
+ pub query: Option<String>,
226
+ /// Limit results
227
+ pub limit: Option<usize>,
228
+ /// Offset for pagination
229
+ pub offset: Option<usize>,
230
+ /// Filter by session ID
231
+ pub session_id: Option<String>,
232
+ /// Filter by project path
233
+ pub project_path: Option<String>,
234
+ /// Filter by message types (user, assistant, system, result)
235
+ pub message_types: Option<Vec<String>>,
236
+ /// Filter by start date (ISO 8601)
237
+ pub start_date: Option<String>,
238
+ /// Filter by end date (ISO 8601)
239
+ pub end_date: Option<String>,
240
+ /// Search specific fields only
241
+ pub fields: Option<Vec<String>>,
242
+ /// Enable fuzzy matching
243
+ pub fuzzy: Option<bool>,
244
+ /// Minimum match score threshold
245
+ pub min_score: Option<f64>,
246
+ /// Timezone for date filtering
247
+ pub timezone: Option<String>,
248
+ }
249
+
250
+ /// Search results
251
+ #[derive(Debug, Clone)]
252
+ pub struct SearchResults {
253
+ /// Matching entries
254
+ pub entries: Vec<SearchResultEntry>,
255
+ /// Total count (before pagination)
256
+ pub total: usize,
257
+ /// Query that was executed
258
+ pub query: String,
259
+ /// Execution time in milliseconds
260
+ pub took_ms: u64,
261
+ /// Index size in bytes
262
+ pub index_size_bytes: u64,
263
+ }
264
+
265
+ /// Single search result entry
266
+ #[derive(Debug, Clone)]
267
+ pub struct SearchResultEntry {
268
+ /// Conversation entry
269
+ pub entry: ConversationEntry,
270
+ /// Relevance score (BM25)
271
+ pub score: f64,
272
+ /// Highlighted snippets
273
+ pub highlights: Vec<String>,
274
+ }
275
+
276
+ /// Index statistics
277
+ #[derive(Debug, Clone)]
278
+ pub struct IndexStats {
279
+ /// Total number of documents
280
+ pub total_docs: usize,
281
+ /// Number of user messages
282
+ pub user_messages: usize,
283
+ /// Number of assistant messages
284
+ pub assistant_messages: usize,
285
+ /// Number of unique sessions
286
+ pub unique_sessions: usize,
287
+ /// Number of unique projects
288
+ pub unique_projects: usize,
289
+ /// Index size in bytes
290
+ pub index_size_bytes: u64,
291
+ }