footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Semantic search module for Footprinter."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from .embeddings import get_embedding_function
|
|
5
|
+
except ImportError:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from .vector_store import VectorStore
|
|
10
|
+
except ImportError:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
__all__ = ["VectorStore", "get_embedding_function"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Pure chunking function for splitting content into overlapping chunks."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Tuple
|
|
4
|
+
|
|
5
|
+
DEFAULT_CHUNK_SIZE = 1000 # chars — tuned for MiniLM-L6-v2 (256-token window)
|
|
6
|
+
DEFAULT_CHUNK_OVERLAP = 150 # chars (15% of default chunk size)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def chunk_content(
|
|
10
|
+
content: str,
|
|
11
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
|
12
|
+
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
|
|
13
|
+
) -> List[Tuple[str, int, int]]:
|
|
14
|
+
"""
|
|
15
|
+
Split content into overlapping chunks with word-boundary awareness.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
content: Text to split.
|
|
19
|
+
chunk_size: Maximum characters per chunk.
|
|
20
|
+
chunk_overlap: Character overlap between consecutive chunks.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
List of (chunk_text, chunk_index, total_chunks) tuples.
|
|
24
|
+
"""
|
|
25
|
+
if len(content) <= chunk_size:
|
|
26
|
+
return [(content, 0, 1)]
|
|
27
|
+
|
|
28
|
+
chunks = []
|
|
29
|
+
start = 0
|
|
30
|
+
chunk_index = 0
|
|
31
|
+
|
|
32
|
+
while start < len(content):
|
|
33
|
+
end = start + chunk_size
|
|
34
|
+
|
|
35
|
+
# Try to break at word boundary
|
|
36
|
+
if end < len(content):
|
|
37
|
+
# Look for space within last 200 chars of chunk
|
|
38
|
+
space_pos = content.rfind(" ", end - 200, end)
|
|
39
|
+
if space_pos > start:
|
|
40
|
+
end = space_pos
|
|
41
|
+
|
|
42
|
+
chunk_text = content[start:end].strip()
|
|
43
|
+
if chunk_text:
|
|
44
|
+
chunks.append((chunk_text, chunk_index, -1)) # Total set later
|
|
45
|
+
chunk_index += 1
|
|
46
|
+
|
|
47
|
+
# Move start with overlap
|
|
48
|
+
start = end - chunk_overlap if end < len(content) else end
|
|
49
|
+
|
|
50
|
+
# Set total_chunks
|
|
51
|
+
total = len(chunks)
|
|
52
|
+
return [(text, idx, total) for text, idx, _ in chunks]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Shared embedding function using ChromaDB's built-in ONNX backend."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
|
|
5
|
+
|
|
6
|
+
_SEMANTIC_AVAILABLE = True
|
|
7
|
+
except ImportError:
|
|
8
|
+
_SEMANTIC_AVAILABLE = False
|
|
9
|
+
|
|
10
|
+
EMBEDDING_DIM = 384
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_embedding_function():
|
|
14
|
+
"""Return ChromaDB's built-in ONNX embedding function (ONNXMiniLM_L6_V2).
|
|
15
|
+
|
|
16
|
+
Returns a callable: ef(texts: list[str]) -> list[list[float]]
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
ImportError: If chromadb is not installed.
|
|
20
|
+
"""
|
|
21
|
+
if not _SEMANTIC_AVAILABLE:
|
|
22
|
+
raise ImportError("chromadb is required for embeddings. Install with: pip install footprinter-cli[semantic]")
|
|
23
|
+
return ONNXMiniLM_L6_V2()
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Hybrid search functions: FTS5 keyword search, snippet extraction, and RRF fusion."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sqlite3
|
|
5
|
+
from typing import Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def extract_snippet(content: str, query: str, window: int = 250) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Extract the most relevant snippet from content based on query.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
content: Full message content.
|
|
16
|
+
query: Search query.
|
|
17
|
+
window: Character window around match.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Snippet with ellipsis and context.
|
|
21
|
+
"""
|
|
22
|
+
if len(content) <= window * 2:
|
|
23
|
+
return content
|
|
24
|
+
|
|
25
|
+
# Find first query term match
|
|
26
|
+
query_terms = query.lower().split()
|
|
27
|
+
content_lower = content.lower()
|
|
28
|
+
|
|
29
|
+
best_pos = 0
|
|
30
|
+
for term in query_terms:
|
|
31
|
+
if len(term) >= 3: # Skip short terms
|
|
32
|
+
pos = content_lower.find(term)
|
|
33
|
+
if pos >= 0:
|
|
34
|
+
best_pos = pos
|
|
35
|
+
break
|
|
36
|
+
|
|
37
|
+
# Calculate window
|
|
38
|
+
start = max(0, best_pos - window // 2)
|
|
39
|
+
end = min(len(content), best_pos + window + window // 2)
|
|
40
|
+
|
|
41
|
+
# Expand to word boundaries
|
|
42
|
+
if start > 0:
|
|
43
|
+
space = content.rfind(" ", 0, start + 20)
|
|
44
|
+
if space > 0:
|
|
45
|
+
start = space + 1
|
|
46
|
+
|
|
47
|
+
if end < len(content):
|
|
48
|
+
space = content.find(" ", end - 20)
|
|
49
|
+
if space > 0:
|
|
50
|
+
end = space
|
|
51
|
+
|
|
52
|
+
snippet = content[start:end].strip()
|
|
53
|
+
|
|
54
|
+
# Add ellipsis
|
|
55
|
+
if start > 0:
|
|
56
|
+
snippet = "..." + snippet
|
|
57
|
+
if end < len(content):
|
|
58
|
+
snippet = snippet + "..."
|
|
59
|
+
|
|
60
|
+
return snippet
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def chat_snippet(row: Dict) -> str:
|
|
64
|
+
"""Build a display snippet from a keyword_search result row."""
|
|
65
|
+
summary = row.get("summary")
|
|
66
|
+
if summary:
|
|
67
|
+
if len(summary) > 300:
|
|
68
|
+
return summary[:300] + "..."
|
|
69
|
+
return summary
|
|
70
|
+
return f"Title match: {row['chat_title']}"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def reciprocal_rank_fusion(semantic_results: List[Dict], keyword_results: List[Dict], k: int = 60) -> List[Dict]:
|
|
74
|
+
"""
|
|
75
|
+
Combine semantic and keyword results using Reciprocal Rank Fusion.
|
|
76
|
+
|
|
77
|
+
RRF score = sum(1 / (k + rank)) for each result list.
|
|
78
|
+
Higher k reduces the impact of high-ranking items.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
semantic_results: Results from semantic search.
|
|
82
|
+
keyword_results: Results from FTS5 keyword search.
|
|
83
|
+
k: RRF constant (default 60, standard value).
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Combined and re-ranked results.
|
|
87
|
+
"""
|
|
88
|
+
rrf_scores = {}
|
|
89
|
+
result_data = {}
|
|
90
|
+
|
|
91
|
+
# Process semantic results
|
|
92
|
+
for rank, result in enumerate(semantic_results):
|
|
93
|
+
chat_id = result["chat_id"]
|
|
94
|
+
rrf_scores[chat_id] = rrf_scores.get(chat_id, 0) + 1.0 / (k + rank + 1)
|
|
95
|
+
if chat_id not in result_data:
|
|
96
|
+
result_data[chat_id] = result
|
|
97
|
+
result_data[chat_id]["semantic_rank"] = rank + 1
|
|
98
|
+
result_data[chat_id]["keyword_rank"] = None
|
|
99
|
+
|
|
100
|
+
# Process keyword results
|
|
101
|
+
for rank, result in enumerate(keyword_results):
|
|
102
|
+
chat_id = result["chat_id"]
|
|
103
|
+
rrf_scores[chat_id] = rrf_scores.get(chat_id, 0) + 1.0 / (k + rank + 1)
|
|
104
|
+
|
|
105
|
+
if chat_id not in result_data:
|
|
106
|
+
# Keyword-only result — use "source" if present, fall back to "account"
|
|
107
|
+
source = result.get("source", result.get("account", "unknown"))
|
|
108
|
+
result_data[chat_id] = {
|
|
109
|
+
"chat_id": chat_id,
|
|
110
|
+
"chat_title": result["chat_title"],
|
|
111
|
+
"message_id": None,
|
|
112
|
+
"role": "keyword",
|
|
113
|
+
"source": source,
|
|
114
|
+
"created_at": result["created_at"],
|
|
115
|
+
"snippet": chat_snippet(result),
|
|
116
|
+
"relevance_score": 0,
|
|
117
|
+
"chunk_type": "keyword_match",
|
|
118
|
+
"chunk_index": 0,
|
|
119
|
+
"total_chunks": 1,
|
|
120
|
+
"semantic_rank": None,
|
|
121
|
+
"keyword_rank": rank + 1,
|
|
122
|
+
}
|
|
123
|
+
else:
|
|
124
|
+
result_data[chat_id]["keyword_rank"] = rank + 1
|
|
125
|
+
|
|
126
|
+
# Sort by RRF score and update relevance_score
|
|
127
|
+
sorted_ids = sorted(rrf_scores.keys(), key=lambda x: rrf_scores[x], reverse=True)
|
|
128
|
+
|
|
129
|
+
combined = []
|
|
130
|
+
for chat_id in sorted_ids:
|
|
131
|
+
result = result_data[chat_id]
|
|
132
|
+
result["rrf_score"] = round(rrf_scores[chat_id], 4)
|
|
133
|
+
# Use RRF score as the display relevance (scaled for readability)
|
|
134
|
+
result["relevance_score"] = round(min(1.0, rrf_scores[chat_id] * 30), 3)
|
|
135
|
+
combined.append(result)
|
|
136
|
+
|
|
137
|
+
return combined
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def keyword_search(
|
|
141
|
+
query: str,
|
|
142
|
+
db_path: str,
|
|
143
|
+
account: Optional[str] = None,
|
|
144
|
+
limit: int = 50,
|
|
145
|
+
) -> List[Dict]:
|
|
146
|
+
"""
|
|
147
|
+
FTS5 keyword search on chat titles and summaries.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
query: Search query.
|
|
151
|
+
db_path: Path to SQLite database.
|
|
152
|
+
account: Filter by account ('claude', 'chatgpt').
|
|
153
|
+
limit: Maximum results.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
List of chat matches with FTS5 rank scores.
|
|
157
|
+
Uses 'source' key (not 'account') for consistency with semantic results.
|
|
158
|
+
"""
|
|
159
|
+
conn = sqlite3.connect(str(db_path), timeout=10)
|
|
160
|
+
conn.row_factory = sqlite3.Row
|
|
161
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
162
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
163
|
+
cursor = conn.cursor()
|
|
164
|
+
|
|
165
|
+
# Escape special FTS5 characters and build query
|
|
166
|
+
safe_query = query.replace('"', '""')
|
|
167
|
+
fts_query = f'"{safe_query}"*'
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
if account:
|
|
171
|
+
cursor.execute(
|
|
172
|
+
"""
|
|
173
|
+
SELECT chat.id, chat.title, chat.summary, chat.account, chat.created_at, chat.message_count,
|
|
174
|
+
fts.rank as fts_rank
|
|
175
|
+
FROM chats_fts fts
|
|
176
|
+
JOIN chats chat ON chat.id = fts.rowid
|
|
177
|
+
WHERE chats_fts MATCH ?
|
|
178
|
+
AND chat.account = ?
|
|
179
|
+
AND chat.status != 'removed'
|
|
180
|
+
ORDER BY fts.rank
|
|
181
|
+
LIMIT ?
|
|
182
|
+
""",
|
|
183
|
+
(fts_query, account, limit),
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
cursor.execute(
|
|
187
|
+
"""
|
|
188
|
+
SELECT chat.id, chat.title, chat.summary, chat.account, chat.created_at, chat.message_count,
|
|
189
|
+
fts.rank as fts_rank
|
|
190
|
+
FROM chats_fts fts
|
|
191
|
+
JOIN chats chat ON chat.id = fts.rowid
|
|
192
|
+
WHERE chats_fts MATCH ?
|
|
193
|
+
AND chat.status != 'removed'
|
|
194
|
+
ORDER BY fts.rank
|
|
195
|
+
LIMIT ?
|
|
196
|
+
""",
|
|
197
|
+
(fts_query, limit),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
results = []
|
|
201
|
+
for row in cursor.fetchall():
|
|
202
|
+
# FTS5 rank is negative (more negative = better match)
|
|
203
|
+
fts_score = min(1.0, abs(row["fts_rank"]) / 10.0)
|
|
204
|
+
|
|
205
|
+
results.append(
|
|
206
|
+
{
|
|
207
|
+
"chat_id": row["id"],
|
|
208
|
+
"chat_title": row["title"] or "(untitled)",
|
|
209
|
+
"source": row["account"] or "unknown",
|
|
210
|
+
"created_at": row["created_at"] or "",
|
|
211
|
+
"message_count": row["message_count"] or 0,
|
|
212
|
+
"summary": row["summary"] or "",
|
|
213
|
+
"fts_score": fts_score,
|
|
214
|
+
"match_type": "keyword",
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.error(f"FTS5 search error: {e}")
|
|
220
|
+
results = []
|
|
221
|
+
|
|
222
|
+
conn.close()
|
|
223
|
+
return results
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def fts5_fallback_search(
|
|
227
|
+
query: str,
|
|
228
|
+
n_results: int = 20,
|
|
229
|
+
source: Optional[str] = None,
|
|
230
|
+
db_path: Optional[str] = None,
|
|
231
|
+
) -> Tuple[List[Dict], bool]:
|
|
232
|
+
"""
|
|
233
|
+
FTS5-only fallback for when ML dependencies are unavailable.
|
|
234
|
+
|
|
235
|
+
Normalizes result shape to match hybrid search output so consumers
|
|
236
|
+
don't need to branch on search mode.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
query: Search query.
|
|
240
|
+
n_results: Max results.
|
|
241
|
+
source: Filter by source/account.
|
|
242
|
+
db_path: Path to SQLite database (auto-detected if None).
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
(results, True) — True indicates fallback mode.
|
|
246
|
+
"""
|
|
247
|
+
if db_path is None:
|
|
248
|
+
from footprinter.paths import get_db_path
|
|
249
|
+
|
|
250
|
+
db_path = str(get_db_path())
|
|
251
|
+
|
|
252
|
+
raw = keyword_search(query, db_path=db_path, account=source, limit=n_results)
|
|
253
|
+
|
|
254
|
+
# Normalize to match hybrid search result shape
|
|
255
|
+
results = []
|
|
256
|
+
for r in raw:
|
|
257
|
+
results.append(
|
|
258
|
+
{
|
|
259
|
+
"chat_id": r["chat_id"],
|
|
260
|
+
"chat_title": r["chat_title"],
|
|
261
|
+
"message_id": None,
|
|
262
|
+
"role": "keyword",
|
|
263
|
+
"source": r["source"],
|
|
264
|
+
"created_at": r["created_at"],
|
|
265
|
+
"snippet": chat_snippet(r),
|
|
266
|
+
"relevance_score": round(r["fts_score"], 3),
|
|
267
|
+
"chunk_type": "keyword_match",
|
|
268
|
+
"chunk_index": 0,
|
|
269
|
+
"total_chunks": 1,
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return results, True
|