footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,250 @@
1
+ """Entity read endpoints for Footprinter HTTP API."""
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import APIRouter, Depends, HTTPException, Query
6
+
7
+ from footprinter.api.db import get_conn
8
+ from footprinter.services import (
9
+ chat_service,
10
+ client_service,
11
+ email_service,
12
+ file_service,
13
+ folder_service,
14
+ project_service,
15
+ visit_service,
16
+ )
17
+ from footprinter.services.roles import Role
18
+
19
+ router = APIRouter(tags=["entities"])
20
+
21
+
22
+ def _or_404(result, entity_type: str, entity_id: int):
23
+ """Return result or raise 404."""
24
+ if result is None:
25
+ raise HTTPException(status_code=404, detail=f"{entity_type} {entity_id} not found")
26
+ return result
27
+
28
+
29
+ # --- Files ---
30
+
31
+
32
+ @router.get("/files")
33
+ def list_files(
34
+ conn=Depends(get_conn),
35
+ project_id: Optional[int] = None,
36
+ source: Optional[str] = Query(None, description="Comma-separated source filter"),
37
+ status: Optional[str] = Query(None, description="Comma-separated status filter"),
38
+ content_type: Optional[str] = None,
39
+ limit: int = 50,
40
+ page: int = 1,
41
+ ):
42
+ source_list = [s.strip() for s in source.split(",")] if source else None
43
+ status_list = [s.strip() for s in status.split(",")] if status else None
44
+ return file_service.list_(
45
+ conn,
46
+ role=Role.ADMIN,
47
+ project_id=project_id,
48
+ source=source_list,
49
+ status=status_list,
50
+ content_type=content_type,
51
+ limit=limit,
52
+ page=page,
53
+ )
54
+
55
+
56
+ @router.get("/files/{file_id}")
57
+ def get_file(file_id: int, conn=Depends(get_conn)):
58
+ return _or_404(file_service.get(conn, file_id, role=Role.ADMIN), "file", file_id)
59
+
60
+
61
+ # --- Emails ---
62
+
63
+
64
+ @router.get("/emails")
65
+ def list_emails(
66
+ conn=Depends(get_conn),
67
+ account: Optional[str] = None,
68
+ client_id: Optional[int] = None,
69
+ project_id: Optional[int] = None,
70
+ query: Optional[str] = None,
71
+ has_attachments: Optional[bool] = None,
72
+ sort_by: str = "received_at",
73
+ order: str = "desc",
74
+ limit: int = 50,
75
+ page: int = 1,
76
+ ):
77
+ return email_service.list_(
78
+ conn,
79
+ role=Role.ADMIN,
80
+ account=account,
81
+ client_id=client_id,
82
+ project_id=project_id,
83
+ query=query,
84
+ has_attachments=has_attachments,
85
+ sort_by=sort_by,
86
+ order=order,
87
+ limit=limit,
88
+ page=page,
89
+ )
90
+
91
+
92
+ @router.get("/emails/{email_id}")
93
+ def get_email(email_id: int, conn=Depends(get_conn)):
94
+ return _or_404(email_service.get(conn, email_id, role=Role.ADMIN), "email", email_id)
95
+
96
+
97
+ # --- Chats ---
98
+
99
+
100
+ @router.get("/chats")
101
+ def list_chats(
102
+ conn=Depends(get_conn),
103
+ account: Optional[str] = None,
104
+ query: Optional[str] = None,
105
+ sort_by: str = "modified_at",
106
+ order: str = "desc",
107
+ status: Optional[str] = Query(None, description="Comma-separated status filter"),
108
+ limit: int = 50,
109
+ page: int = 1,
110
+ ):
111
+ status_list = [s.strip() for s in status.split(",")] if status else None
112
+ return chat_service.list_(
113
+ conn,
114
+ role=Role.ADMIN,
115
+ account=account,
116
+ query=query,
117
+ sort_by=sort_by,
118
+ order=order,
119
+ status=status_list,
120
+ limit=limit,
121
+ page=page,
122
+ )
123
+
124
+
125
+ @router.get("/chats/{chat_id}")
126
+ def get_chat(chat_id: int, conn=Depends(get_conn)):
127
+ return _or_404(chat_service.get(conn, chat_id, role=Role.ADMIN), "chat", chat_id)
128
+
129
+
130
+ # --- Projects ---
131
+
132
+
133
+ @router.get("/projects")
134
+ def list_projects(
135
+ conn=Depends(get_conn),
136
+ include: Optional[str] = Query(None, description="Comma-separated includes"),
137
+ status: Optional[str] = Query(None, description="Comma-separated status filter"),
138
+ client: Optional[str] = None,
139
+ project_type: Optional[str] = None,
140
+ limit: int = 50,
141
+ page: int = 1,
142
+ ):
143
+ include_list = [s.strip() for s in include.split(",")] if include else None
144
+ status_list = [s.strip() for s in status.split(",")] if status else None
145
+ return project_service.list_(
146
+ conn,
147
+ role=Role.ADMIN,
148
+ include=include_list,
149
+ status=status_list,
150
+ client=client,
151
+ project_type=project_type,
152
+ limit=limit,
153
+ page=page,
154
+ )
155
+
156
+
157
+ @router.get("/projects/{project_id}")
158
+ def get_project(project_id: int, conn=Depends(get_conn), include: Optional[str] = None):
159
+ include_list = [s.strip() for s in include.split(",")] if include else None
160
+ return _or_404(
161
+ project_service.get(conn, project_id, role=Role.ADMIN, include=include_list),
162
+ "project",
163
+ project_id,
164
+ )
165
+
166
+
167
+ # --- Clients ---
168
+
169
+
170
+ @router.get("/clients")
171
+ def list_clients(
172
+ conn=Depends(get_conn),
173
+ include: Optional[str] = Query(None, description="Comma-separated includes"),
174
+ status: Optional[str] = Query(None, description="Comma-separated status filter"),
175
+ limit: int = 50,
176
+ page: int = 1,
177
+ ):
178
+ include_list = [s.strip() for s in include.split(",")] if include else None
179
+ status_list = [s.strip() for s in status.split(",")] if status else None
180
+ return client_service.list_(
181
+ conn,
182
+ role=Role.ADMIN,
183
+ include=include_list,
184
+ status=status_list,
185
+ limit=limit,
186
+ page=page,
187
+ )
188
+
189
+
190
+ @router.get("/clients/{client_id}")
191
+ def get_client(client_id: int, conn=Depends(get_conn), include: Optional[str] = None):
192
+ include_list = [s.strip() for s in include.split(",")] if include else None
193
+ return _or_404(
194
+ client_service.get(conn, client_id, role=Role.ADMIN, include=include_list),
195
+ "client",
196
+ client_id,
197
+ )
198
+
199
+
200
+ # --- Folders ---
201
+ # NOTE: /folders/by-path MUST be defined before /folders/{folder_id}
202
+ # to avoid FastAPI treating "by-path" as an int parameter.
203
+
204
+
205
+ @router.get("/folders/by-path")
206
+ def get_folder_by_path(path: str, conn=Depends(get_conn)):
207
+ result = folder_service.get_by_path(conn, path, role=Role.ADMIN)
208
+ if result is None:
209
+ raise HTTPException(status_code=404, detail=f"folder at path '{path}' not found")
210
+ return result
211
+
212
+
213
+ @router.get("/folders")
214
+ def list_folders(
215
+ conn=Depends(get_conn),
216
+ project_id: Optional[int] = None,
217
+ depth: Optional[int] = 1,
218
+ include_hidden: bool = False,
219
+ sort_by: str = "size",
220
+ limit: int = 50,
221
+ page: int = 1,
222
+ ):
223
+ return folder_service.list_(
224
+ conn,
225
+ role=Role.ADMIN,
226
+ project_id=project_id,
227
+ depth=depth,
228
+ include_hidden=include_hidden,
229
+ sort_by=sort_by,
230
+ limit=limit,
231
+ page=page,
232
+ )
233
+
234
+
235
+ @router.get("/folders/{folder_id}")
236
+ def get_folder(folder_id: int, conn=Depends(get_conn)):
237
+ return _or_404(folder_service.get(conn, folder_id, role=Role.ADMIN), "folder", folder_id)
238
+
239
+
240
+ # --- Visits ---
241
+
242
+
243
+ @router.get("/visits")
244
+ def list_visits(conn=Depends(get_conn), limit: int = 50, page: int = 1):
245
+ return visit_service.list_(conn, role=Role.ADMIN, limit=limit, page=page)
246
+
247
+
248
+ @router.get("/visits/{entry_id}")
249
+ def get_visit(entry_id: int, conn=Depends(get_conn)):
250
+ return _or_404(visit_service.get(conn, entry_id, role=Role.ADMIN), "visit", entry_id)
@@ -0,0 +1,47 @@
1
+ """Search endpoint for Footprinter HTTP API."""
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import APIRouter, Depends, Query
6
+
7
+ from footprinter.api.db import get_conn
8
+ from footprinter.services import search_service
9
+ from footprinter.services.roles import Role
10
+
11
+ router = APIRouter(tags=["search"])
12
+
13
+
14
+ @router.get("/search")
15
+ def search(
16
+ conn=Depends(get_conn),
17
+ query: str = "",
18
+ sources: Optional[str] = Query(None, description="Comma-separated source filter"),
19
+ project: Optional[str] = None,
20
+ client: Optional[str] = None,
21
+ date_from: Optional[str] = None,
22
+ date_to: Optional[str] = None,
23
+ limit: int = 50,
24
+ account: Optional[str] = None,
25
+ sender: Optional[str] = None,
26
+ days_back: Optional[int] = None,
27
+ folder: Optional[str] = None,
28
+ mime_type: Optional[str] = None,
29
+ ):
30
+ """Keyword search across indexed content."""
31
+ source_list = [s.strip() for s in sources.split(",")] if sources else None
32
+ return search_service.search(
33
+ conn,
34
+ role=Role.ADMIN,
35
+ query=query,
36
+ sources=source_list,
37
+ project=project,
38
+ client=client,
39
+ date_from=date_from,
40
+ date_to=date_to,
41
+ limit=limit,
42
+ account=account,
43
+ sender=sender,
44
+ days_back=days_back,
45
+ folder=folder,
46
+ mime_type=mime_type,
47
+ )
@@ -0,0 +1,33 @@
1
+ """Semantic search endpoint for Footprinter HTTP API."""
2
+
3
+ from fastapi import APIRouter, Depends, HTTPException, Query
4
+
5
+ from footprinter.api.db import get_conn
6
+ from footprinter.services import semantic_service
7
+ from footprinter.services.roles import Role
8
+
9
+ router = APIRouter(tags=["semantic"])
10
+
11
+ _VALID_SOURCES = {"chats", "files", "all"}
12
+
13
+
14
+ @router.get("/semantic")
15
+ def semantic_search(
16
+ conn=Depends(get_conn),
17
+ query: str = Query(..., min_length=3, description="Search query (minimum 3 characters)"),
18
+ source: str = Query("all", description="Source to search: chats, files, or all"),
19
+ limit: int = 10,
20
+ ):
21
+ """Semantic (vector) search across indexed content."""
22
+ if source not in _VALID_SOURCES:
23
+ raise HTTPException(
24
+ status_code=422,
25
+ detail=f"Invalid source '{source}'. Must be one of: {', '.join(sorted(_VALID_SOURCES))}",
26
+ )
27
+ return semantic_service.semantic_search(
28
+ conn,
29
+ query,
30
+ role=Role.ADMIN,
31
+ source=source,
32
+ limit=limit,
33
+ )
@@ -0,0 +1,66 @@
1
+ """Footprinter HTTP API — FastAPI app factory and server entry point."""
2
+
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.responses import JSONResponse
5
+
6
+ from footprinter.api.db import DatabaseNotInitializedError
7
+
8
+
9
+ def create_app() -> "FastAPI":
10
+ """Create and configure the FastAPI application.
11
+
12
+ Mounts all API routers under ``/api`` and registers error handlers.
13
+ Semantic router is conditionally mounted if chromadb is available.
14
+ """
15
+ from footprinter import __version__
16
+ from footprinter.api.entities import router as entities_router
17
+ from footprinter.api.search import router as search_router
18
+ from footprinter.api.status import router as status_router
19
+
20
+ app = FastAPI(
21
+ title="Footprinter API",
22
+ version=__version__,
23
+ description="HTTP API for Footprinter — file archival and AI context system.",
24
+ )
25
+
26
+ # Health check (outside /api prefix)
27
+ @app.get("/health")
28
+ def health():
29
+ return {"status": "ok"}
30
+
31
+ # Exception handler for uninitialized DB
32
+ @app.exception_handler(DatabaseNotInitializedError)
33
+ async def db_not_initialized_handler(request: Request, exc: DatabaseNotInitializedError):
34
+ return JSONResponse(
35
+ status_code=503,
36
+ content={
37
+ "detail": "Database not initialized. Run 'fp ingest' to populate.",
38
+ },
39
+ )
40
+
41
+ # Mount routers
42
+ app.include_router(status_router, prefix="/api")
43
+ app.include_router(search_router, prefix="/api")
44
+ app.include_router(entities_router, prefix="/api")
45
+
46
+ # Conditional semantic router
47
+ try:
48
+ from footprinter.api.semantic import router as semantic_router
49
+
50
+ app.include_router(semantic_router, prefix="/api")
51
+ except ImportError:
52
+ pass
53
+
54
+ return app
55
+
56
+
57
+ def main(host: str = "127.0.0.1", port: int = 8000) -> None:
58
+ """Start the Footprinter HTTP API server."""
59
+ import uvicorn
60
+
61
+ app = create_app()
62
+ uvicorn.run(app, host=host, port=port)
63
+
64
+
65
+ if __name__ == "__main__":
66
+ main()
@@ -0,0 +1,15 @@
1
+ """Status endpoint for Footprinter HTTP API."""
2
+
3
+ from fastapi import APIRouter, Depends
4
+
5
+ from footprinter.api.db import get_conn
6
+ from footprinter.services import status_service
7
+ from footprinter.services.roles import Role
8
+
9
+ router = APIRouter(tags=["status"])
10
+
11
+
12
+ @router.get("/status")
13
+ def get_status(conn=Depends(get_conn)):
14
+ """Return system status and data counts."""
15
+ return status_service.get_status(conn, role=Role.ADMIN)
File without changes
@@ -0,0 +1,161 @@
1
+ # Footprinter Configuration (example)
2
+ # Copy to config.yaml and fill in your values.
3
+
4
+ # Directories to scan - only Work, Personal, and .claude
5
+ directories:
6
+ - "~/Work"
7
+ - "~/Personal"
8
+ - "~/.claude"
9
+
10
+ # Browser history sources
11
+ browsers:
12
+ - safari
13
+ - chrome
14
+
15
+ # Exclusion patterns (regex)
16
+ # Patterns starting with ^~/ expand ~ to your home directory at runtime.
17
+ #
18
+ # Files matching these patterns are NEVER scanned — they don't appear in the
19
+ # database at all. This is different from hidden files (dot-files/dot-directories),
20
+ # which ARE scanned and indexed with status='hidden' so they appear in the catalog
21
+ # but are excluded from Drive sync.
22
+ #
23
+ # Two tiers: always (all folders), sensitive (all folders)
24
+ exclusions:
25
+ # Always skip - applies to all folders
26
+ # Files matching these patterns are skipped during scanning and never enter the database.
27
+ # Use for regeneratable dependencies and system noise that has no archival value.
28
+ always:
29
+ # Regeneratable dependencies
30
+ - ".*/node_modules/.*" # npm dependencies
31
+ - ".*/__pycache__/.*" # Python bytecode
32
+ - ".*/venv/.*" # Python virtualenvs (non-hidden)
33
+ - ".*/\\.venv/.*" # Python virtualenvs (hidden)
34
+ - ".*/site-packages/.*" # Python packages
35
+ - ".*\\.pyc$" # Python compiled files
36
+ - ".*/\\.vscode/extensions/.*" # VS Code extensions (reinstallable)
37
+ - ".*/\\.npm/.*" # npm cache
38
+ - ".*/\\.nvm/.*" # Node version manager
39
+ - ".*/\\.cache/.*" # Generic caches
40
+ - ".*/\\.sf/.*" # Salesforce CLI
41
+ - ".*/\\.sfdx/.*" # Salesforce DX CLI
42
+ - ".*/\\.docker/.*" # Docker config
43
+ - ".*/\\.cumulusci/.*" # CumulusCI cache
44
+ # Home-level Claude dirs only (keep .claude within Work/Personal)
45
+ - "^~/\\.claude/.*" # Home-level .claude
46
+ - "^~/\\.claude-worktrees/.*" # Home-level .claude-worktrees
47
+ # Git internals (but NOT .gitignore, .gitattributes - those are kept)
48
+ - ".*/\\.git/.*" # Git internal directory contents
49
+ # System noise
50
+ - ".*\\.tmp$" # Temp files
51
+ - ".*/~\\$.*" # Office lock files (~$document.docx)
52
+ - ".*/\\.Trash/.*" # macOS Trash
53
+ - ".*/Library/.*" # macOS system directory
54
+ - ".*\\.DS_Store$" # macOS Finder metadata
55
+ # User directories not for archival
56
+ - "^~/Downloads/.*" # Transient downloads
57
+ - "^~/Music/Audio Music Apps/.*" # App databases
58
+ - "^~/Applications/.*" # User-installed apps
59
+ # Hidden app data (not in Work/Personal)
60
+ - "^~/\\.local/.*" # Local app data
61
+ - "^~/\\.cursor/.*" # Cursor IDE
62
+ - "^~/\\.codex/.*" # Codex CLI
63
+ - "^~/\\.zsh_sessions/.*" # Shell history
64
+ - "^~/\\.bash_sessions/.*" # Shell history
65
+
66
+ # Always skip - sensitive data
67
+ # Credential files and key stores that should never be indexed for security.
68
+ # Matched files are skipped during scanning (never enter the database).
69
+ sensitive:
70
+ - ".*/Library/Keychains/.*" # macOS keychains
71
+ - ".*/\\.aws/.*" # AWS credentials
72
+ - ".*/\\.ssh/.*" # SSH keys
73
+ - ".*/\\.kube/.*" # Kubernetes config
74
+
75
+ # Indexing configuration - INDEX ALL FILE TYPES
76
+ indexing:
77
+ supported_extensions: [] # Empty = index ALL file types
78
+ max_file_size_mb: 0 # 0 = no size limit (index everything)
79
+ lookback_days: 14 # Browser history window (days back to index)
80
+ content_snippets: false # Extract file/email content previews for keyword search
81
+
82
+ # Semantic search — stores content as embeddings in a local ChromaDB database
83
+ # Enables finding files and chats by meaning, not just keywords
84
+ # Trade-off: additional disk space and longer indexing time
85
+ semantic:
86
+ file_vectorization: false
87
+ chat_vectorization: false
88
+
89
+ # Vectorization — controls what gets embedded for semantic search
90
+ # Requires semantic.file_vectorization: true to take effect for files
91
+ vectorization:
92
+ # File extensions to embed (omit = all types; empty list = disable vectorization)
93
+ # Default: prose documents that benefit from meaning-based search.
94
+ # Code files (.py, .js, etc.) are searchable via FTS — vectorization adds
95
+ # little value for structured code but significant noise to the vector space.
96
+ # NOTE: file_types filters among types the extractor can read (text, code,
97
+ # .pdf, .docx, .csv). Adding unsupported types (.xlsx, .pptx, etc.) has no
98
+ # effect — files pass the allowlist but produce no embeddings.
99
+ file_types:
100
+ - .md
101
+ - .txt
102
+ - .pdf
103
+ - .docx
104
+ # Chunk size in characters — tuned for MiniLM-L6-v2 (256-token input window).
105
+ # ~1000 chars ≈ 250 tokens. Larger chunks get silently truncated by the model,
106
+ # meaning content past the window is invisible to semantic search.
107
+ chunk_size: 1000
108
+ # Overlap between consecutive chunks (fraction of chunk_size, 0.0 to 1.0).
109
+ # Applies to file vectorization only. Chat message chunking uses a fixed
110
+ # character overlap defined in footprinter/semantic/chunking.py.
111
+ chunk_overlap: 0.15
112
+ # Patterns (fnmatch syntax) for file paths to skip during vectorization.
113
+ # Matched against the full absolute path (e.g. ~/Work/file.json expands at runtime).
114
+ # Files matching these patterns are still indexed (searchable via FTS)
115
+ # but not embedded in the vector store.
116
+ #
117
+ # Common exclusions — system caches, IDE output, and tool output that
118
+ # match text file extensions but contain no meaningful prose content:
119
+ # - "**/Photos Library.photoslibrary/**" # macOS Spotlight index cache (.txt)
120
+ # - "**/.claude/debug/**" # Claude Code debug logs
121
+ # - "**/.claude/paste-cache/**" # Claude Code paste cache
122
+ # - "**/.claude/cache/**" # Claude Code cache
123
+ # - "**/.claude/projects/**" # Claude Code session data
124
+ # - "**/.claude/plans/**" # Claude Code auto-generated plans
125
+ # - "**/.claude/plugins/**" # Claude Code plugin cache
126
+ # - "**/.cci/**" # CumulusCI cache
127
+ # - "**/.context/**" # IDE context directories
128
+ exclude_patterns: []
129
+
130
+ # Source registry seeds — loaded into the sources table on init
131
+ # Connector sources added by: fp connect install <name>
132
+ source_seeds:
133
+ - name: local
134
+ source_type: file
135
+ account: null
136
+ label: "Local Files"
137
+ icon: folder
138
+ - name: browser
139
+ source_type: browser
140
+ account: null
141
+ label: "Browser History"
142
+ icon: globe
143
+ - name: email
144
+ source_type: email
145
+ account: null
146
+ label: "Email"
147
+ icon: envelope
148
+ - name: chat
149
+ source_type: chat
150
+ account: null
151
+ label: "Chat"
152
+ icon: message
153
+
154
+ # Display labels for the group/project hierarchy
155
+ # Customize these to match your organization's terminology
156
+ domain:
157
+ labels:
158
+ group_singular: "Client"
159
+ group_plural: "Clients"
160
+ project_singular: "Project"
161
+ project_plural: "Projects"
@@ -0,0 +1,18 @@
1
+ # Context and identity path patterns
2
+
3
+ # Primary context detection by path
4
+ # Keys must match Context enum names
5
+ context_path_patterns:
6
+ CLIENT:
7
+ - '/Work/clients/'
8
+ INTERNAL: [] # Populated from config folder_classifications.internal_data
9
+ PERSONAL:
10
+ - '/Personal/'
11
+ CAREER:
12
+ - '/Work/admin/'
13
+ - '/Work/archive/'
14
+
15
+ # Path patterns for identity document detection (matched against lowercased paths)
16
+ identity_path_patterns:
17
+ - '/personal/identity/'
18
+ - '/identity/'