@ebowwa/claudecodehistory 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,458 +0,0 @@
1
- //! Query execution for conversation history search
2
- //!
3
- //! Provides high-performance indexed search using Tantivy BM25 scoring.
4
-
5
- use tantivy::{
6
- collector::TopDocs,
7
- query::*,
8
- schema::{IndexRecordOption, Schema, Value},
9
- Index, Term,
10
- };
11
-
12
- use super::schema::{HistoryFields, extract_project_name, timestamp_to_ms};
13
- use super::{SearchError, IndexedSearchOptions, AdvancedSearchOptions, SearchResults, SearchResultEntry};
14
- use crate::types::{ConversationEntry, MessageType};
15
-
16
- /// Build a combined query from text query and optional filters
17
- pub fn build_combined_query(
18
- _schema: &Schema,
19
- fields: &HistoryFields,
20
- query: &str,
21
- options: &IndexedSearchOptions,
22
- ) -> Box<dyn tantivy::query::Query> {
23
- let mut clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
24
-
25
- // Build text query from search terms
26
- let trimmed = query.trim();
27
- if !trimmed.is_empty() {
28
- // Split query into terms and create term queries for each
29
- let terms: Vec<&str> = trimmed.split_whitespace().collect();
30
-
31
- if !terms.is_empty() {
32
- // Use QueryParser for better text search
33
- let mut text_clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
34
-
35
- for term in terms {
36
- let term_query = Box::new(TermQuery::new(
37
- Term::from_field_text(fields.all_text, term),
38
- IndexRecordOption::Basic,
39
- ));
40
- text_clauses.push((tantivy::query::Occur::Should, term_query));
41
- }
42
-
43
- // Combine terms with OR (any term can match)
44
- let text_query: Box<dyn tantivy::query::Query> = Box::new(BooleanQuery::new(text_clauses));
45
- clauses.push((tantivy::query::Occur::Must, text_query));
46
- }
47
- }
48
-
49
- // Add session filter
50
- if let Some(ref session_id) = options.session_id {
51
- let session_query = Box::new(TermQuery::new(
52
- Term::from_field_text(fields.session_id, session_id),
53
- IndexRecordOption::Basic,
54
- ));
55
- clauses.push((tantivy::query::Occur::Must, session_query));
56
- }
57
-
58
- // Add project path filter
59
- if let Some(ref project_path) = options.project_path {
60
- let project_query = Box::new(TermQuery::new(
61
- Term::from_field_text(fields.project_path, project_path),
62
- IndexRecordOption::Basic,
63
- ));
64
- clauses.push((tantivy::query::Occur::Must, project_query));
65
- }
66
-
67
- // Add message type filter
68
- if let Some(ref message_types) = options.message_types {
69
- if !message_types.is_empty() {
70
- let mut type_clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
71
-
72
- for msg_type in message_types {
73
- let type_query = Box::new(TermQuery::new(
74
- Term::from_field_text(fields.message_type, msg_type),
75
- IndexRecordOption::Basic,
76
- ));
77
- type_clauses.push((tantivy::query::Occur::Should, type_query));
78
- }
79
-
80
- let types_query: Box<dyn tantivy::query::Query> = Box::new(BooleanQuery::new(type_clauses));
81
- clauses.push((tantivy::query::Occur::Must, types_query));
82
- }
83
- }
84
-
85
- // Return combined query or match-all
86
- if clauses.is_empty() {
87
- Box::new(AllQuery)
88
- } else {
89
- Box::new(BooleanQuery::new(clauses))
90
- }
91
- }
92
-
93
- /// Execute a search query and return results
94
- pub fn execute_search(
95
- index: &Index,
96
- schema: &Schema,
97
- query: &str,
98
- options: IndexedSearchOptions,
99
- ) -> Result<SearchResults, SearchError> {
100
- let start = std::time::Instant::now();
101
- let fields = HistoryFields::new(schema);
102
-
103
- let limit = options.limit.unwrap_or(10);
104
- let offset = options.offset.unwrap_or(0);
105
-
106
- // Build the query
107
- let combined_query = build_combined_query(schema, &fields, query, &options);
108
-
109
- // Create reader and searcher
110
- let reader = index.reader()
111
- .map_err(|e| SearchError::Query(e.to_string()))?;
112
-
113
- let searcher = reader.searcher();
114
-
115
- // Execute search with pagination
116
- let collector = TopDocs::with_limit(limit + offset);
117
-
118
- let top_docs = searcher.search(&combined_query, &collector)
119
- .map_err(|e| SearchError::Query(e.to_string()))?;
120
-
121
- // Collect results
122
- let mut entries: Vec<SearchResultEntry> = Vec::with_capacity(limit);
123
- let mut total_count = 0;
124
-
125
- for (score, doc_address) in top_docs {
126
- total_count += 1;
127
-
128
- // Skip offset results
129
- if total_count <= offset {
130
- continue;
131
- }
132
-
133
- // Get document
134
- if let Ok(doc) = searcher.doc(doc_address) {
135
- if let Some(entry) = doc_to_entry(&doc, schema, &fields, score as f64) {
136
- entries.push(entry);
137
- }
138
- }
139
-
140
- // Stop after limit
141
- if entries.len() >= limit {
142
- break;
143
- }
144
- }
145
-
146
- // Calculate index size
147
- let index_size_bytes = get_index_size(index);
148
-
149
- Ok(SearchResults {
150
- entries,
151
- total: total_count,
152
- query: query.to_string(),
153
- took_ms: start.elapsed().as_millis() as u64,
154
- index_size_bytes,
155
- })
156
- }
157
-
158
- /// Execute an advanced search with filters
159
- pub fn execute_advanced_search(
160
- index: &Index,
161
- schema: &Schema,
162
- options: AdvancedSearchOptions,
163
- ) -> Result<SearchResults, SearchError> {
164
- let start = std::time::Instant::now();
165
- let fields = HistoryFields::new(schema);
166
-
167
- let limit = options.limit.unwrap_or(10);
168
- let offset = options.offset.unwrap_or(0);
169
-
170
- let mut clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
171
-
172
- // Add text query if provided
173
- if let Some(ref query) = options.query {
174
- let trimmed = query.trim();
175
- if !trimmed.is_empty() {
176
- let terms: Vec<&str> = trimmed.split_whitespace().collect();
177
-
178
- if !terms.is_empty() {
179
- let mut text_clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
180
-
181
- for term in terms {
182
- let term_query = Box::new(TermQuery::new(
183
- Term::from_field_text(fields.all_text, term),
184
- IndexRecordOption::Basic,
185
- ));
186
- text_clauses.push((tantivy::query::Occur::Should, term_query));
187
- }
188
-
189
- let text_query: Box<dyn tantivy::query::Query> = Box::new(BooleanQuery::new(text_clauses));
190
- clauses.push((tantivy::query::Occur::Must, text_query));
191
- }
192
- }
193
- }
194
-
195
- // Add session filter
196
- if let Some(ref session_id) = options.session_id {
197
- let session_query = Box::new(TermQuery::new(
198
- Term::from_field_text(fields.session_id, session_id),
199
- IndexRecordOption::Basic,
200
- ));
201
- clauses.push((tantivy::query::Occur::Must, session_query));
202
- }
203
-
204
- // Add project path filter
205
- if let Some(ref project_path) = options.project_path {
206
- let project_query = Box::new(TermQuery::new(
207
- Term::from_field_text(fields.project_path, project_path),
208
- IndexRecordOption::Basic,
209
- ));
210
- clauses.push((tantivy::query::Occur::Must, project_query));
211
- }
212
-
213
- // Add message type filter
214
- if let Some(ref message_types) = options.message_types {
215
- if !message_types.is_empty() {
216
- let mut type_clauses: Vec<(tantivy::query::Occur, Box<dyn tantivy::query::Query>)> = Vec::new();
217
-
218
- for msg_type in message_types {
219
- let type_query = Box::new(TermQuery::new(
220
- Term::from_field_text(fields.message_type, msg_type),
221
- IndexRecordOption::Basic,
222
- ));
223
- type_clauses.push((tantivy::query::Occur::Should, type_query));
224
- }
225
-
226
- let types_query: Box<dyn tantivy::query::Query> = Box::new(BooleanQuery::new(type_clauses));
227
- clauses.push((tantivy::query::Occur::Must, types_query));
228
- }
229
- }
230
-
231
- // Add date range filter
232
- if options.start_date.is_some() || options.end_date.is_some() {
233
- let start_ts = options.start_date.as_ref().map(|d| timestamp_to_ms(d)).unwrap_or(0);
234
- let end_ts = options.end_date.as_ref().map(|d| timestamp_to_ms(d)).unwrap_or(i64::MAX);
235
-
236
- let range_query = Box::new(RangeQuery::new_i64_bounds(
237
- "timestamp_ms".to_string(),
238
- std::ops::Bound::Included(start_ts),
239
- std::ops::Bound::Included(end_ts),
240
- ));
241
- clauses.push((tantivy::query::Occur::Must, range_query));
242
- }
243
-
244
- // Build final query
245
- let final_query: Box<dyn tantivy::query::Query> = if clauses.is_empty() {
246
- Box::new(AllQuery)
247
- } else {
248
- Box::new(BooleanQuery::new(clauses))
249
- };
250
-
251
- // Create reader and searcher
252
- let reader = index.reader()
253
- .map_err(|e| SearchError::Query(e.to_string()))?;
254
-
255
- let searcher = reader.searcher();
256
-
257
- // Execute search
258
- let collector = TopDocs::with_limit(limit + offset);
259
-
260
- let top_docs = searcher.search(&final_query, &collector)
261
- .map_err(|e| SearchError::Query(e.to_string()))?;
262
-
263
- // Filter by min_score if specified
264
- let min_score = options.min_score.unwrap_or(0.0);
265
-
266
- // Collect results
267
- let mut entries: Vec<SearchResultEntry> = Vec::with_capacity(limit);
268
- let mut total_count = 0;
269
-
270
- for (score, doc_address) in top_docs {
271
- // Filter by minimum score
272
- if (score as f64) < min_score {
273
- continue;
274
- }
275
-
276
- total_count += 1;
277
-
278
- // Skip offset results
279
- if total_count <= offset {
280
- continue;
281
- }
282
-
283
- // Get document
284
- if let Ok(doc) = searcher.doc(doc_address) {
285
- if let Some(entry) = doc_to_entry(&doc, schema, &fields, score as f64) {
286
- entries.push(entry);
287
- }
288
- }
289
-
290
- // Stop after limit
291
- if entries.len() >= limit {
292
- break;
293
- }
294
- }
295
-
296
- // Calculate index size
297
- let index_size_bytes = get_index_size(index);
298
-
299
- Ok(SearchResults {
300
- entries,
301
- total: total_count,
302
- query: options.query.unwrap_or_default(),
303
- took_ms: start.elapsed().as_millis() as u64,
304
- index_size_bytes,
305
- })
306
- }
307
-
308
- /// Convert a Tantivy document to a SearchResultEntry
309
- fn doc_to_entry(
310
- doc: &tantivy::TantivyDocument,
311
- _schema: &Schema,
312
- fields: &HistoryFields,
313
- score: f64,
314
- ) -> Option<SearchResultEntry> {
315
- // Extract required fields
316
- let uuid = doc.get_first(fields.uuid)
317
- .and_then(|v| v.as_str().map(|s| s.to_string()))?;
318
-
319
- let session_id = doc.get_first(fields.session_id)
320
- .and_then(|v| v.as_str().map(|s| s.to_string()))?;
321
-
322
- let project_path = doc.get_first(fields.project_path)
323
- .and_then(|v| v.as_str().map(|s| s.to_string()))?;
324
-
325
- let content = doc.get_first(fields.content)
326
- .and_then(|v| v.as_str().map(|s| s.to_string()))?;
327
-
328
- let message_type_str = doc.get_first(fields.message_type)
329
- .and_then(|v| v.as_str().map(|s| s.to_string()))
330
- .unwrap_or_else(|| "user".to_string());
331
-
332
- let entry_type = MessageType::from(message_type_str.as_str());
333
-
334
- // Extract timestamp - convert back to ISO string
335
- let timestamp_ms = doc.get_first(fields.timestamp_ms)
336
- .and_then(|v| v.as_i64())
337
- .unwrap_or(0);
338
-
339
- let timestamp = if timestamp_ms > 0 {
340
- chrono::DateTime::from_timestamp_millis(timestamp_ms)
341
- .map(|dt| dt.to_rfc3339())
342
- .unwrap_or_else(|| "".to_string())
343
- } else {
344
- "".to_string()
345
- };
346
-
347
- // Build metadata from optional fields
348
- let metadata = {
349
- let model = doc.get_first(fields.model)
350
- .and_then(|v| v.as_str().map(|s| s.to_string()));
351
-
352
- let request_id = doc.get_first(fields.request_id)
353
- .and_then(|v| v.as_str().map(|s| s.to_string()));
354
-
355
- let cost_usd = doc.get_first(fields.cost_usd)
356
- .and_then(|v| v.as_f64());
357
-
358
- let duration_ms = doc.get_first(fields.duration_ms)
359
- .and_then(|v| v.as_i64())
360
- .map(|v| v as u64);
361
-
362
- let is_error = doc.get_first(fields.is_error)
363
- .and_then(|v| v.as_bool());
364
-
365
- if model.is_some() || request_id.is_some() || cost_usd.is_some() || duration_ms.is_some() || is_error.is_some() {
366
- Some(crate::types::EntryMetadata {
367
- usage: None,
368
- total_cost_usd: cost_usd,
369
- num_turns: None,
370
- duration_ms,
371
- is_error,
372
- error_type: None,
373
- model,
374
- request_id,
375
- })
376
- } else {
377
- None
378
- }
379
- };
380
-
381
- // Build conversation entry
382
- let entry = ConversationEntry {
383
- uuid,
384
- session_id,
385
- timestamp,
386
- project_path: project_path.clone(),
387
- entry_type,
388
- content,
389
- formatted_time: None,
390
- time_ago: None,
391
- local_date: None,
392
- metadata,
393
- project_name: Some(extract_project_name(&project_path)),
394
- };
395
-
396
- Some(SearchResultEntry {
397
- entry,
398
- score,
399
- highlights: Vec::new(), // TODO: Add highlighting support
400
- })
401
- }
402
-
403
- /// Get the size of the index directory in bytes
404
- /// Note: In Tantivy 0.22, ManagedDirectory doesn't expose path()
405
- /// This function returns 0 as a placeholder - index size can be tracked separately
406
- fn get_index_size(_index: &Index) -> u64 {
407
- // Tantivy 0.22 doesn't expose directory path through ManagedDirectory
408
- // Index size tracking would need to be done at a higher level
409
- 0
410
- }
411
-
412
- #[cfg(test)]
413
- mod tests {
414
- use super::*;
415
- use super::super::create_history_schema;
416
-
417
- #[test]
418
- fn test_build_combined_query_empty() {
419
- let schema = create_history_schema();
420
- let fields = HistoryFields::new(&schema);
421
- let options = IndexedSearchOptions::default();
422
-
423
- let query = build_combined_query(&schema, &fields, "", &options);
424
-
425
- // Empty query should return match-all
426
- assert!(format!("{:?}", query).contains("AllQuery"));
427
- }
428
-
429
- #[test]
430
- fn test_build_combined_query_with_text() {
431
- let schema = create_history_schema();
432
- let fields = HistoryFields::new(&schema);
433
- let options = IndexedSearchOptions::default();
434
-
435
- let query = build_combined_query(&schema, &fields, "hello world", &options);
436
-
437
- // Should contain BooleanQuery with terms
438
- let query_str = format!("{:?}", query);
439
- assert!(query_str.contains("Boolean") || query_str.contains("Term"));
440
- }
441
-
442
- #[test]
443
- fn test_build_combined_query_with_filters() {
444
- let schema = create_history_schema();
445
- let fields = HistoryFields::new(&schema);
446
- let options = IndexedSearchOptions {
447
- session_id: Some("test-session".to_string()),
448
- project_path: Some("/test/project".to_string()),
449
- ..Default::default()
450
- };
451
-
452
- let query = build_combined_query(&schema, &fields, "test", &options);
453
-
454
- // Should contain filters
455
- let query_str = format!("{:?}", query);
456
- assert!(query_str.contains("Boolean") || query_str.contains("Term"));
457
- }
458
- }
@@ -1,115 +0,0 @@
1
- //! Schema definitions for conversation history search index
2
-
3
- use tantivy::schema::*;
4
-
5
- /// Create the schema for conversation history search
6
- pub fn create_history_schema() -> Schema {
7
- let mut schema_builder = Schema::builder();
8
-
9
- // Primary identifiers
10
- schema_builder.add_text_field("uuid", STRING | STORED);
11
- schema_builder.add_text_field("session_id", STRING | STORED | FAST);
12
- schema_builder.add_text_field("project_path", STRING | STORED | FAST);
13
-
14
- // Message type and role
15
- schema_builder.add_text_field("message_type", STRING | STORED | FAST);
16
- schema_builder.add_text_field("role", STRING | STORED | FAST);
17
-
18
- // Timestamp for range queries
19
- schema_builder.add_i64_field("timestamp_ms", STORED | FAST | INDEXED);
20
-
21
- // Content fields
22
- schema_builder.add_text_field("content", TEXT | STORED);
23
- schema_builder.add_text_field("all_text", TEXT); // Combined searchable field
24
-
25
- // Metadata
26
- schema_builder.add_text_field("model", STRING | STORED);
27
- schema_builder.add_text_field("request_id", STRING | STORED | FAST);
28
- schema_builder.add_f64_field("cost_usd", STORED | FAST);
29
- schema_builder.add_i64_field("duration_ms", STORED | FAST | INDEXED);
30
- schema_builder.add_bool_field("is_error", STORED | FAST);
31
-
32
- // Project name for faceted search
33
- schema_builder.add_text_field("project_name", STRING | STORED | FAST);
34
-
35
- schema_builder.build()
36
- }
37
-
38
- /// Field accessors for the history schema
39
- pub struct HistoryFields {
40
- pub uuid: Field,
41
- pub session_id: Field,
42
- pub project_path: Field,
43
- pub message_type: Field,
44
- pub role: Field,
45
- pub timestamp_ms: Field,
46
- pub content: Field,
47
- pub all_text: Field,
48
- pub model: Field,
49
- pub request_id: Field,
50
- pub cost_usd: Field,
51
- pub duration_ms: Field,
52
- pub is_error: Field,
53
- pub project_name: Field,
54
- }
55
-
56
- impl HistoryFields {
57
- /// Create field accessors from a schema
58
- pub fn new(schema: &Schema) -> Self {
59
- Self {
60
- uuid: schema.get_field("uuid").expect("uuid field missing"),
61
- session_id: schema.get_field("session_id").expect("session_id field missing"),
62
- project_path: schema.get_field("project_path").expect("project_path field missing"),
63
- message_type: schema.get_field("message_type").expect("message_type field missing"),
64
- role: schema.get_field("role").expect("role field missing"),
65
- timestamp_ms: schema.get_field("timestamp_ms").expect("timestamp_ms field missing"),
66
- content: schema.get_field("content").expect("content field missing"),
67
- all_text: schema.get_field("all_text").expect("all_text field missing"),
68
- model: schema.get_field("model").expect("model field missing"),
69
- request_id: schema.get_field("request_id").expect("request_id field missing"),
70
- cost_usd: schema.get_field("cost_usd").expect("cost_usd field missing"),
71
- duration_ms: schema.get_field("duration_ms").expect("duration_ms field missing"),
72
- is_error: schema.get_field("is_error").expect("is_error field missing"),
73
- project_name: schema.get_field("project_name").expect("project_name field missing"),
74
- }
75
- }
76
- }
77
-
78
- /// Convert timestamp string to milliseconds since epoch
79
- pub fn timestamp_to_ms(ts: &str) -> i64 {
80
- chrono::DateTime::parse_from_rfc3339(ts)
81
- .map(|dt| dt.timestamp_millis())
82
- .unwrap_or(0)
83
- }
84
-
85
- /// Extract project name from path
86
- pub fn extract_project_name(project_path: &str) -> String {
87
- project_path
88
- .split('/')
89
- .last()
90
- .unwrap_or(project_path)
91
- .to_string()
92
- }
93
-
94
- #[cfg(test)]
95
- mod tests {
96
- use super::*;
97
-
98
- #[test]
99
- fn test_schema_creation() {
100
- let schema = create_history_schema();
101
- assert!(schema.get_field("uuid").is_ok());
102
- assert!(schema.get_field("content").is_ok());
103
- assert!(schema.get_field("session_id").is_ok());
104
- }
105
-
106
- #[test]
107
- fn test_fields_access() {
108
- let schema = create_history_schema();
109
- let fields = HistoryFields::new(&schema);
110
-
111
- // Verify all fields are accessible
112
- assert!(schema.get_field_name(fields.uuid) == "uuid");
113
- assert!(schema.get_field_name(fields.content) == "content");
114
- }
115
- }