footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""Entity read endpoints for Footprinter HTTP API."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
6
|
+
|
|
7
|
+
from footprinter.api.db import get_conn
|
|
8
|
+
from footprinter.services import (
|
|
9
|
+
chat_service,
|
|
10
|
+
client_service,
|
|
11
|
+
email_service,
|
|
12
|
+
file_service,
|
|
13
|
+
folder_service,
|
|
14
|
+
project_service,
|
|
15
|
+
visit_service,
|
|
16
|
+
)
|
|
17
|
+
from footprinter.services.roles import Role
|
|
18
|
+
|
|
19
|
+
router = APIRouter(tags=["entities"])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _or_404(result, entity_type: str, entity_id: int):
|
|
23
|
+
"""Return result or raise 404."""
|
|
24
|
+
if result is None:
|
|
25
|
+
raise HTTPException(status_code=404, detail=f"{entity_type} {entity_id} not found")
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# --- Files ---
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@router.get("/files")
|
|
33
|
+
def list_files(
|
|
34
|
+
conn=Depends(get_conn),
|
|
35
|
+
project_id: Optional[int] = None,
|
|
36
|
+
source: Optional[str] = Query(None, description="Comma-separated source filter"),
|
|
37
|
+
status: Optional[str] = Query(None, description="Comma-separated status filter"),
|
|
38
|
+
content_type: Optional[str] = None,
|
|
39
|
+
limit: int = 50,
|
|
40
|
+
page: int = 1,
|
|
41
|
+
):
|
|
42
|
+
source_list = [s.strip() for s in source.split(",")] if source else None
|
|
43
|
+
status_list = [s.strip() for s in status.split(",")] if status else None
|
|
44
|
+
return file_service.list_(
|
|
45
|
+
conn,
|
|
46
|
+
role=Role.ADMIN,
|
|
47
|
+
project_id=project_id,
|
|
48
|
+
source=source_list,
|
|
49
|
+
status=status_list,
|
|
50
|
+
content_type=content_type,
|
|
51
|
+
limit=limit,
|
|
52
|
+
page=page,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@router.get("/files/{file_id}")
|
|
57
|
+
def get_file(file_id: int, conn=Depends(get_conn)):
|
|
58
|
+
return _or_404(file_service.get(conn, file_id, role=Role.ADMIN), "file", file_id)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# --- Emails ---
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@router.get("/emails")
|
|
65
|
+
def list_emails(
|
|
66
|
+
conn=Depends(get_conn),
|
|
67
|
+
account: Optional[str] = None,
|
|
68
|
+
client_id: Optional[int] = None,
|
|
69
|
+
project_id: Optional[int] = None,
|
|
70
|
+
query: Optional[str] = None,
|
|
71
|
+
has_attachments: Optional[bool] = None,
|
|
72
|
+
sort_by: str = "received_at",
|
|
73
|
+
order: str = "desc",
|
|
74
|
+
limit: int = 50,
|
|
75
|
+
page: int = 1,
|
|
76
|
+
):
|
|
77
|
+
return email_service.list_(
|
|
78
|
+
conn,
|
|
79
|
+
role=Role.ADMIN,
|
|
80
|
+
account=account,
|
|
81
|
+
client_id=client_id,
|
|
82
|
+
project_id=project_id,
|
|
83
|
+
query=query,
|
|
84
|
+
has_attachments=has_attachments,
|
|
85
|
+
sort_by=sort_by,
|
|
86
|
+
order=order,
|
|
87
|
+
limit=limit,
|
|
88
|
+
page=page,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@router.get("/emails/{email_id}")
|
|
93
|
+
def get_email(email_id: int, conn=Depends(get_conn)):
|
|
94
|
+
return _or_404(email_service.get(conn, email_id, role=Role.ADMIN), "email", email_id)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# --- Chats ---
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@router.get("/chats")
|
|
101
|
+
def list_chats(
|
|
102
|
+
conn=Depends(get_conn),
|
|
103
|
+
account: Optional[str] = None,
|
|
104
|
+
query: Optional[str] = None,
|
|
105
|
+
sort_by: str = "modified_at",
|
|
106
|
+
order: str = "desc",
|
|
107
|
+
status: Optional[str] = Query(None, description="Comma-separated status filter"),
|
|
108
|
+
limit: int = 50,
|
|
109
|
+
page: int = 1,
|
|
110
|
+
):
|
|
111
|
+
status_list = [s.strip() for s in status.split(",")] if status else None
|
|
112
|
+
return chat_service.list_(
|
|
113
|
+
conn,
|
|
114
|
+
role=Role.ADMIN,
|
|
115
|
+
account=account,
|
|
116
|
+
query=query,
|
|
117
|
+
sort_by=sort_by,
|
|
118
|
+
order=order,
|
|
119
|
+
status=status_list,
|
|
120
|
+
limit=limit,
|
|
121
|
+
page=page,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@router.get("/chats/{chat_id}")
|
|
126
|
+
def get_chat(chat_id: int, conn=Depends(get_conn)):
|
|
127
|
+
return _or_404(chat_service.get(conn, chat_id, role=Role.ADMIN), "chat", chat_id)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- Projects ---
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@router.get("/projects")
|
|
134
|
+
def list_projects(
|
|
135
|
+
conn=Depends(get_conn),
|
|
136
|
+
include: Optional[str] = Query(None, description="Comma-separated includes"),
|
|
137
|
+
status: Optional[str] = Query(None, description="Comma-separated status filter"),
|
|
138
|
+
client: Optional[str] = None,
|
|
139
|
+
project_type: Optional[str] = None,
|
|
140
|
+
limit: int = 50,
|
|
141
|
+
page: int = 1,
|
|
142
|
+
):
|
|
143
|
+
include_list = [s.strip() for s in include.split(",")] if include else None
|
|
144
|
+
status_list = [s.strip() for s in status.split(",")] if status else None
|
|
145
|
+
return project_service.list_(
|
|
146
|
+
conn,
|
|
147
|
+
role=Role.ADMIN,
|
|
148
|
+
include=include_list,
|
|
149
|
+
status=status_list,
|
|
150
|
+
client=client,
|
|
151
|
+
project_type=project_type,
|
|
152
|
+
limit=limit,
|
|
153
|
+
page=page,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@router.get("/projects/{project_id}")
|
|
158
|
+
def get_project(project_id: int, conn=Depends(get_conn), include: Optional[str] = None):
|
|
159
|
+
include_list = [s.strip() for s in include.split(",")] if include else None
|
|
160
|
+
return _or_404(
|
|
161
|
+
project_service.get(conn, project_id, role=Role.ADMIN, include=include_list),
|
|
162
|
+
"project",
|
|
163
|
+
project_id,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# --- Clients ---
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@router.get("/clients")
|
|
171
|
+
def list_clients(
|
|
172
|
+
conn=Depends(get_conn),
|
|
173
|
+
include: Optional[str] = Query(None, description="Comma-separated includes"),
|
|
174
|
+
status: Optional[str] = Query(None, description="Comma-separated status filter"),
|
|
175
|
+
limit: int = 50,
|
|
176
|
+
page: int = 1,
|
|
177
|
+
):
|
|
178
|
+
include_list = [s.strip() for s in include.split(",")] if include else None
|
|
179
|
+
status_list = [s.strip() for s in status.split(",")] if status else None
|
|
180
|
+
return client_service.list_(
|
|
181
|
+
conn,
|
|
182
|
+
role=Role.ADMIN,
|
|
183
|
+
include=include_list,
|
|
184
|
+
status=status_list,
|
|
185
|
+
limit=limit,
|
|
186
|
+
page=page,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@router.get("/clients/{client_id}")
|
|
191
|
+
def get_client(client_id: int, conn=Depends(get_conn), include: Optional[str] = None):
|
|
192
|
+
include_list = [s.strip() for s in include.split(",")] if include else None
|
|
193
|
+
return _or_404(
|
|
194
|
+
client_service.get(conn, client_id, role=Role.ADMIN, include=include_list),
|
|
195
|
+
"client",
|
|
196
|
+
client_id,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# --- Folders ---
|
|
201
|
+
# NOTE: /folders/by-path MUST be defined before /folders/{folder_id}
|
|
202
|
+
# to avoid FastAPI treating "by-path" as an int parameter.
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@router.get("/folders/by-path")
|
|
206
|
+
def get_folder_by_path(path: str, conn=Depends(get_conn)):
|
|
207
|
+
result = folder_service.get_by_path(conn, path, role=Role.ADMIN)
|
|
208
|
+
if result is None:
|
|
209
|
+
raise HTTPException(status_code=404, detail=f"folder at path '{path}' not found")
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@router.get("/folders")
|
|
214
|
+
def list_folders(
|
|
215
|
+
conn=Depends(get_conn),
|
|
216
|
+
project_id: Optional[int] = None,
|
|
217
|
+
depth: Optional[int] = 1,
|
|
218
|
+
include_hidden: bool = False,
|
|
219
|
+
sort_by: str = "size",
|
|
220
|
+
limit: int = 50,
|
|
221
|
+
page: int = 1,
|
|
222
|
+
):
|
|
223
|
+
return folder_service.list_(
|
|
224
|
+
conn,
|
|
225
|
+
role=Role.ADMIN,
|
|
226
|
+
project_id=project_id,
|
|
227
|
+
depth=depth,
|
|
228
|
+
include_hidden=include_hidden,
|
|
229
|
+
sort_by=sort_by,
|
|
230
|
+
limit=limit,
|
|
231
|
+
page=page,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@router.get("/folders/{folder_id}")
|
|
236
|
+
def get_folder(folder_id: int, conn=Depends(get_conn)):
|
|
237
|
+
return _or_404(folder_service.get(conn, folder_id, role=Role.ADMIN), "folder", folder_id)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# --- Visits ---
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@router.get("/visits")
|
|
244
|
+
def list_visits(conn=Depends(get_conn), limit: int = 50, page: int = 1):
|
|
245
|
+
return visit_service.list_(conn, role=Role.ADMIN, limit=limit, page=page)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
@router.get("/visits/{entry_id}")
|
|
249
|
+
def get_visit(entry_id: int, conn=Depends(get_conn)):
|
|
250
|
+
return _or_404(visit_service.get(conn, entry_id, role=Role.ADMIN), "visit", entry_id)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Search endpoint for Footprinter HTTP API."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends, Query
|
|
6
|
+
|
|
7
|
+
from footprinter.api.db import get_conn
|
|
8
|
+
from footprinter.services import search_service
|
|
9
|
+
from footprinter.services.roles import Role
|
|
10
|
+
|
|
11
|
+
router = APIRouter(tags=["search"])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/search")
|
|
15
|
+
def search(
|
|
16
|
+
conn=Depends(get_conn),
|
|
17
|
+
query: str = "",
|
|
18
|
+
sources: Optional[str] = Query(None, description="Comma-separated source filter"),
|
|
19
|
+
project: Optional[str] = None,
|
|
20
|
+
client: Optional[str] = None,
|
|
21
|
+
date_from: Optional[str] = None,
|
|
22
|
+
date_to: Optional[str] = None,
|
|
23
|
+
limit: int = 50,
|
|
24
|
+
account: Optional[str] = None,
|
|
25
|
+
sender: Optional[str] = None,
|
|
26
|
+
days_back: Optional[int] = None,
|
|
27
|
+
folder: Optional[str] = None,
|
|
28
|
+
mime_type: Optional[str] = None,
|
|
29
|
+
):
|
|
30
|
+
"""Keyword search across indexed content."""
|
|
31
|
+
source_list = [s.strip() for s in sources.split(",")] if sources else None
|
|
32
|
+
return search_service.search(
|
|
33
|
+
conn,
|
|
34
|
+
role=Role.ADMIN,
|
|
35
|
+
query=query,
|
|
36
|
+
sources=source_list,
|
|
37
|
+
project=project,
|
|
38
|
+
client=client,
|
|
39
|
+
date_from=date_from,
|
|
40
|
+
date_to=date_to,
|
|
41
|
+
limit=limit,
|
|
42
|
+
account=account,
|
|
43
|
+
sender=sender,
|
|
44
|
+
days_back=days_back,
|
|
45
|
+
folder=folder,
|
|
46
|
+
mime_type=mime_type,
|
|
47
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Semantic search endpoint for Footprinter HTTP API."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
4
|
+
|
|
5
|
+
from footprinter.api.db import get_conn
|
|
6
|
+
from footprinter.services import semantic_service
|
|
7
|
+
from footprinter.services.roles import Role
|
|
8
|
+
|
|
9
|
+
router = APIRouter(tags=["semantic"])
|
|
10
|
+
|
|
11
|
+
_VALID_SOURCES = {"chats", "files", "all"}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/semantic")
|
|
15
|
+
def semantic_search(
|
|
16
|
+
conn=Depends(get_conn),
|
|
17
|
+
query: str = Query(..., min_length=3, description="Search query (minimum 3 characters)"),
|
|
18
|
+
source: str = Query("all", description="Source to search: chats, files, or all"),
|
|
19
|
+
limit: int = 10,
|
|
20
|
+
):
|
|
21
|
+
"""Semantic (vector) search across indexed content."""
|
|
22
|
+
if source not in _VALID_SOURCES:
|
|
23
|
+
raise HTTPException(
|
|
24
|
+
status_code=422,
|
|
25
|
+
detail=f"Invalid source '{source}'. Must be one of: {', '.join(sorted(_VALID_SOURCES))}",
|
|
26
|
+
)
|
|
27
|
+
return semantic_service.semantic_search(
|
|
28
|
+
conn,
|
|
29
|
+
query,
|
|
30
|
+
role=Role.ADMIN,
|
|
31
|
+
source=source,
|
|
32
|
+
limit=limit,
|
|
33
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Footprinter HTTP API — FastAPI app factory and server entry point."""
|
|
2
|
+
|
|
3
|
+
from fastapi import FastAPI, Request
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
|
|
6
|
+
from footprinter.api.db import DatabaseNotInitializedError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create_app() -> "FastAPI":
|
|
10
|
+
"""Create and configure the FastAPI application.
|
|
11
|
+
|
|
12
|
+
Mounts all API routers under ``/api`` and registers error handlers.
|
|
13
|
+
Semantic router is conditionally mounted if chromadb is available.
|
|
14
|
+
"""
|
|
15
|
+
from footprinter import __version__
|
|
16
|
+
from footprinter.api.entities import router as entities_router
|
|
17
|
+
from footprinter.api.search import router as search_router
|
|
18
|
+
from footprinter.api.status import router as status_router
|
|
19
|
+
|
|
20
|
+
app = FastAPI(
|
|
21
|
+
title="Footprinter API",
|
|
22
|
+
version=__version__,
|
|
23
|
+
description="HTTP API for Footprinter — file archival and AI context system.",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Health check (outside /api prefix)
|
|
27
|
+
@app.get("/health")
|
|
28
|
+
def health():
|
|
29
|
+
return {"status": "ok"}
|
|
30
|
+
|
|
31
|
+
# Exception handler for uninitialized DB
|
|
32
|
+
@app.exception_handler(DatabaseNotInitializedError)
|
|
33
|
+
async def db_not_initialized_handler(request: Request, exc: DatabaseNotInitializedError):
|
|
34
|
+
return JSONResponse(
|
|
35
|
+
status_code=503,
|
|
36
|
+
content={
|
|
37
|
+
"detail": "Database not initialized. Run 'fp ingest' to populate.",
|
|
38
|
+
},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Mount routers
|
|
42
|
+
app.include_router(status_router, prefix="/api")
|
|
43
|
+
app.include_router(search_router, prefix="/api")
|
|
44
|
+
app.include_router(entities_router, prefix="/api")
|
|
45
|
+
|
|
46
|
+
# Conditional semantic router
|
|
47
|
+
try:
|
|
48
|
+
from footprinter.api.semantic import router as semantic_router
|
|
49
|
+
|
|
50
|
+
app.include_router(semantic_router, prefix="/api")
|
|
51
|
+
except ImportError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return app
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def main(host: str = "127.0.0.1", port: int = 8000) -> None:
|
|
58
|
+
"""Start the Footprinter HTTP API server."""
|
|
59
|
+
import uvicorn
|
|
60
|
+
|
|
61
|
+
app = create_app()
|
|
62
|
+
uvicorn.run(app, host=host, port=port)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Status endpoint for Footprinter HTTP API."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends
|
|
4
|
+
|
|
5
|
+
from footprinter.api.db import get_conn
|
|
6
|
+
from footprinter.services import status_service
|
|
7
|
+
from footprinter.services.roles import Role
|
|
8
|
+
|
|
9
|
+
router = APIRouter(tags=["status"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/status")
|
|
13
|
+
def get_status(conn=Depends(get_conn)):
|
|
14
|
+
"""Return system status and data counts."""
|
|
15
|
+
return status_service.get_status(conn, role=Role.ADMIN)
|
|
File without changes
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Footprinter Configuration (example)
|
|
2
|
+
# Copy to config.yaml and fill in your values.
|
|
3
|
+
|
|
4
|
+
# Directories to scan - only Work, Personal, and .claude
|
|
5
|
+
directories:
|
|
6
|
+
- "~/Work"
|
|
7
|
+
- "~/Personal"
|
|
8
|
+
- "~/.claude"
|
|
9
|
+
|
|
10
|
+
# Browser history sources
|
|
11
|
+
browsers:
|
|
12
|
+
- safari
|
|
13
|
+
- chrome
|
|
14
|
+
|
|
15
|
+
# Exclusion patterns (regex)
|
|
16
|
+
# Patterns starting with ^~/ expand ~ to your home directory at runtime.
|
|
17
|
+
#
|
|
18
|
+
# Files matching these patterns are NEVER scanned — they don't appear in the
|
|
19
|
+
# database at all. This is different from hidden files (dot-files/dot-directories),
|
|
20
|
+
# which ARE scanned and indexed with status='hidden' so they appear in the catalog
|
|
21
|
+
# but are excluded from Drive sync.
|
|
22
|
+
#
|
|
23
|
+
# Two tiers: always (all folders), sensitive (all folders)
|
|
24
|
+
exclusions:
|
|
25
|
+
# Always skip - applies to all folders
|
|
26
|
+
# Files matching these patterns are skipped during scanning and never enter the database.
|
|
27
|
+
# Use for regeneratable dependencies and system noise that has no archival value.
|
|
28
|
+
always:
|
|
29
|
+
# Regeneratable dependencies
|
|
30
|
+
- ".*/node_modules/.*" # npm dependencies
|
|
31
|
+
- ".*/__pycache__/.*" # Python bytecode
|
|
32
|
+
- ".*/venv/.*" # Python virtualenvs (non-hidden)
|
|
33
|
+
- ".*/\\.venv/.*" # Python virtualenvs (hidden)
|
|
34
|
+
- ".*/site-packages/.*" # Python packages
|
|
35
|
+
- ".*\\.pyc$" # Python compiled files
|
|
36
|
+
- ".*/\\.vscode/extensions/.*" # VS Code extensions (reinstallable)
|
|
37
|
+
- ".*/\\.npm/.*" # npm cache
|
|
38
|
+
- ".*/\\.nvm/.*" # Node version manager
|
|
39
|
+
- ".*/\\.cache/.*" # Generic caches
|
|
40
|
+
- ".*/\\.sf/.*" # Salesforce CLI
|
|
41
|
+
- ".*/\\.sfdx/.*" # Salesforce DX CLI
|
|
42
|
+
- ".*/\\.docker/.*" # Docker config
|
|
43
|
+
- ".*/\\.cumulusci/.*" # CumulusCI cache
|
|
44
|
+
# Home-level Claude dirs only (keep .claude within Work/Personal)
|
|
45
|
+
- "^~/\\.claude/.*" # Home-level .claude
|
|
46
|
+
- "^~/\\.claude-worktrees/.*" # Home-level .claude-worktrees
|
|
47
|
+
# Git internals (but NOT .gitignore, .gitattributes - those are kept)
|
|
48
|
+
- ".*/\\.git/.*" # Git internal directory contents
|
|
49
|
+
# System noise
|
|
50
|
+
- ".*\\.tmp$" # Temp files
|
|
51
|
+
- ".*/~\\$.*" # Office lock files (~$document.docx)
|
|
52
|
+
- ".*/\\.Trash/.*" # macOS Trash
|
|
53
|
+
- ".*/Library/.*" # macOS system directory
|
|
54
|
+
- ".*\\.DS_Store$" # macOS Finder metadata
|
|
55
|
+
# User directories not for archival
|
|
56
|
+
- "^~/Downloads/.*" # Transient downloads
|
|
57
|
+
- "^~/Music/Audio Music Apps/.*" # App databases
|
|
58
|
+
- "^~/Applications/.*" # User-installed apps
|
|
59
|
+
# Hidden app data (not in Work/Personal)
|
|
60
|
+
- "^~/\\.local/.*" # Local app data
|
|
61
|
+
- "^~/\\.cursor/.*" # Cursor IDE
|
|
62
|
+
- "^~/\\.codex/.*" # Codex CLI
|
|
63
|
+
- "^~/\\.zsh_sessions/.*" # Shell history
|
|
64
|
+
- "^~/\\.bash_sessions/.*" # Shell history
|
|
65
|
+
|
|
66
|
+
# Always skip - sensitive data
|
|
67
|
+
# Credential files and key stores that should never be indexed for security.
|
|
68
|
+
# Matched files are skipped during scanning (never enter the database).
|
|
69
|
+
sensitive:
|
|
70
|
+
- ".*/Library/Keychains/.*" # macOS keychains
|
|
71
|
+
- ".*/\\.aws/.*" # AWS credentials
|
|
72
|
+
- ".*/\\.ssh/.*" # SSH keys
|
|
73
|
+
- ".*/\\.kube/.*" # Kubernetes config
|
|
74
|
+
|
|
75
|
+
# Indexing configuration - INDEX ALL FILE TYPES
|
|
76
|
+
indexing:
|
|
77
|
+
supported_extensions: [] # Empty = index ALL file types
|
|
78
|
+
max_file_size_mb: 0 # 0 = no size limit (index everything)
|
|
79
|
+
lookback_days: 14 # Browser history window (days back to index)
|
|
80
|
+
content_snippets: false # Extract file/email content previews for keyword search
|
|
81
|
+
|
|
82
|
+
# Semantic search — stores content as embeddings in a local ChromaDB database
|
|
83
|
+
# Enables finding files and chats by meaning, not just keywords
|
|
84
|
+
# Trade-off: additional disk space and longer indexing time
|
|
85
|
+
semantic:
|
|
86
|
+
file_vectorization: false
|
|
87
|
+
chat_vectorization: false
|
|
88
|
+
|
|
89
|
+
# Vectorization — controls what gets embedded for semantic search
|
|
90
|
+
# Requires semantic.file_vectorization: true to take effect for files
|
|
91
|
+
vectorization:
|
|
92
|
+
# File extensions to embed (omit = all types; empty list = disable vectorization)
|
|
93
|
+
# Default: prose documents that benefit from meaning-based search.
|
|
94
|
+
# Code files (.py, .js, etc.) are searchable via FTS — vectorization adds
|
|
95
|
+
# little value for structured code but significant noise to the vector space.
|
|
96
|
+
# NOTE: file_types filters among types the extractor can read (text, code,
|
|
97
|
+
# .pdf, .docx, .csv). Adding unsupported types (.xlsx, .pptx, etc.) has no
|
|
98
|
+
# effect — files pass the allowlist but produce no embeddings.
|
|
99
|
+
file_types:
|
|
100
|
+
- .md
|
|
101
|
+
- .txt
|
|
102
|
+
- .pdf
|
|
103
|
+
- .docx
|
|
104
|
+
# Chunk size in characters — tuned for MiniLM-L6-v2 (256-token input window).
|
|
105
|
+
# ~1000 chars ≈ 250 tokens. Larger chunks get silently truncated by the model,
|
|
106
|
+
# meaning content past the window is invisible to semantic search.
|
|
107
|
+
chunk_size: 1000
|
|
108
|
+
# Overlap between consecutive chunks (fraction of chunk_size, 0.0 to 1.0).
|
|
109
|
+
# Applies to file vectorization only. Chat message chunking uses a fixed
|
|
110
|
+
# character overlap defined in footprinter/semantic/chunking.py.
|
|
111
|
+
chunk_overlap: 0.15
|
|
112
|
+
# Patterns (fnmatch syntax) for file paths to skip during vectorization.
|
|
113
|
+
# Matched against the full absolute path (e.g. ~/Work/file.json expands at runtime).
|
|
114
|
+
# Files matching these patterns are still indexed (searchable via FTS)
|
|
115
|
+
# but not embedded in the vector store.
|
|
116
|
+
#
|
|
117
|
+
# Common exclusions — system caches, IDE output, and tool output that
|
|
118
|
+
# match text file extensions but contain no meaningful prose content:
|
|
119
|
+
# - "**/Photos Library.photoslibrary/**" # macOS Spotlight index cache (.txt)
|
|
120
|
+
# - "**/.claude/debug/**" # Claude Code debug logs
|
|
121
|
+
# - "**/.claude/paste-cache/**" # Claude Code paste cache
|
|
122
|
+
# - "**/.claude/cache/**" # Claude Code cache
|
|
123
|
+
# - "**/.claude/projects/**" # Claude Code session data
|
|
124
|
+
# - "**/.claude/plans/**" # Claude Code auto-generated plans
|
|
125
|
+
# - "**/.claude/plugins/**" # Claude Code plugin cache
|
|
126
|
+
# - "**/.cci/**" # CumulusCI cache
|
|
127
|
+
# - "**/.context/**" # IDE context directories
|
|
128
|
+
exclude_patterns: []
|
|
129
|
+
|
|
130
|
+
# Source registry seeds — loaded into the sources table on init
|
|
131
|
+
# Connector sources added by: fp connect install <name>
|
|
132
|
+
source_seeds:
|
|
133
|
+
- name: local
|
|
134
|
+
source_type: file
|
|
135
|
+
account: null
|
|
136
|
+
label: "Local Files"
|
|
137
|
+
icon: folder
|
|
138
|
+
- name: browser
|
|
139
|
+
source_type: browser
|
|
140
|
+
account: null
|
|
141
|
+
label: "Browser History"
|
|
142
|
+
icon: globe
|
|
143
|
+
- name: email
|
|
144
|
+
source_type: email
|
|
145
|
+
account: null
|
|
146
|
+
label: "Email"
|
|
147
|
+
icon: envelope
|
|
148
|
+
- name: chat
|
|
149
|
+
source_type: chat
|
|
150
|
+
account: null
|
|
151
|
+
label: "Chat"
|
|
152
|
+
icon: message
|
|
153
|
+
|
|
154
|
+
# Display labels for the group/project hierarchy
|
|
155
|
+
# Customize these to match your organization's terminology
|
|
156
|
+
domain:
|
|
157
|
+
labels:
|
|
158
|
+
group_singular: "Client"
|
|
159
|
+
group_plural: "Clients"
|
|
160
|
+
project_singular: "Project"
|
|
161
|
+
project_plural: "Projects"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Context and identity path patterns
|
|
2
|
+
|
|
3
|
+
# Primary context detection by path
|
|
4
|
+
# Keys must match Context enum names
|
|
5
|
+
context_path_patterns:
|
|
6
|
+
CLIENT:
|
|
7
|
+
- '/Work/clients/'
|
|
8
|
+
INTERNAL: [] # Populated from config folder_classifications.internal_data
|
|
9
|
+
PERSONAL:
|
|
10
|
+
- '/Personal/'
|
|
11
|
+
CAREER:
|
|
12
|
+
- '/Work/admin/'
|
|
13
|
+
- '/Work/archive/'
|
|
14
|
+
|
|
15
|
+
# Path patterns for identity document detection (matched against lowercased paths)
|
|
16
|
+
identity_path_patterns:
|
|
17
|
+
- '/personal/identity/'
|
|
18
|
+
- '/identity/'
|