footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# Extension-to-ContentType mappings for v2 classification
|
|
2
|
+
# Each key maps to a list of file extensions (with leading dots)
|
|
3
|
+
|
|
4
|
+
code:
|
|
5
|
+
# Standard programming languages
|
|
6
|
+
- '.py'
|
|
7
|
+
- '.js'
|
|
8
|
+
- '.ts'
|
|
9
|
+
- '.jsx'
|
|
10
|
+
- '.tsx'
|
|
11
|
+
- '.java'
|
|
12
|
+
- '.cls'
|
|
13
|
+
- '.trigger'
|
|
14
|
+
- '.apex'
|
|
15
|
+
- '.go'
|
|
16
|
+
- '.rs'
|
|
17
|
+
- '.rb'
|
|
18
|
+
- '.sh'
|
|
19
|
+
- '.bash'
|
|
20
|
+
- '.sql'
|
|
21
|
+
- '.c'
|
|
22
|
+
- '.cpp'
|
|
23
|
+
- '.h'
|
|
24
|
+
- '.hpp'
|
|
25
|
+
- '.swift'
|
|
26
|
+
- '.kt'
|
|
27
|
+
- '.scala'
|
|
28
|
+
- '.r'
|
|
29
|
+
- '.pl'
|
|
30
|
+
- '.php'
|
|
31
|
+
- '.lua'
|
|
32
|
+
- '.vim'
|
|
33
|
+
- '.el'
|
|
34
|
+
- '.zsh'
|
|
35
|
+
- '.ps1'
|
|
36
|
+
- '.psm1'
|
|
37
|
+
- '.css'
|
|
38
|
+
- '.scss'
|
|
39
|
+
- '.less'
|
|
40
|
+
- '.vue'
|
|
41
|
+
- '.svelte'
|
|
42
|
+
- '.ex'
|
|
43
|
+
- '.exs'
|
|
44
|
+
- '.erl'
|
|
45
|
+
- '.hs'
|
|
46
|
+
# JavaScript/TypeScript module variants
|
|
47
|
+
- '.mjs'
|
|
48
|
+
- '.cjs'
|
|
49
|
+
- '.mts'
|
|
50
|
+
- '.cts'
|
|
51
|
+
# Python type stubs and Cython
|
|
52
|
+
- '.pyi'
|
|
53
|
+
- '.pyx'
|
|
54
|
+
- '.pxd'
|
|
55
|
+
- '.pxi'
|
|
56
|
+
# Other languages common in packages
|
|
57
|
+
- '.f90'
|
|
58
|
+
- '.f'
|
|
59
|
+
- '.f95'
|
|
60
|
+
- '.for'
|
|
61
|
+
- '.cc'
|
|
62
|
+
- '.cxx'
|
|
63
|
+
- '.proto'
|
|
64
|
+
- '.typed'
|
|
65
|
+
- '.coffee'
|
|
66
|
+
- '.mdx'
|
|
67
|
+
- '.robot'
|
|
68
|
+
- '.wasm'
|
|
69
|
+
- '.doctest'
|
|
70
|
+
- '.fish'
|
|
71
|
+
- '.csh'
|
|
72
|
+
- '.wsdl'
|
|
73
|
+
# Build/project files
|
|
74
|
+
- '.dtd'
|
|
75
|
+
- '.sdl'
|
|
76
|
+
- '.pbxproj'
|
|
77
|
+
- '.xcscheme'
|
|
78
|
+
# Design files
|
|
79
|
+
- '.design'
|
|
80
|
+
# Test files
|
|
81
|
+
- '.test'
|
|
82
|
+
- '.spec'
|
|
83
|
+
# Init files
|
|
84
|
+
- '.init'
|
|
85
|
+
|
|
86
|
+
image:
|
|
87
|
+
- '.png'
|
|
88
|
+
- '.jpg'
|
|
89
|
+
- '.jpeg'
|
|
90
|
+
- '.gif'
|
|
91
|
+
- '.webp'
|
|
92
|
+
- '.svg'
|
|
93
|
+
- '.ico'
|
|
94
|
+
- '.heic'
|
|
95
|
+
- '.heif'
|
|
96
|
+
- '.bmp'
|
|
97
|
+
- '.tiff'
|
|
98
|
+
- '.tif'
|
|
99
|
+
- '.avif'
|
|
100
|
+
- '.raw'
|
|
101
|
+
- '.cr2'
|
|
102
|
+
- '.nef'
|
|
103
|
+
- '.dng'
|
|
104
|
+
- '.psd'
|
|
105
|
+
- '.ai'
|
|
106
|
+
- '.eps'
|
|
107
|
+
|
|
108
|
+
recording:
|
|
109
|
+
- '.mp4'
|
|
110
|
+
- '.mov'
|
|
111
|
+
- '.m4a'
|
|
112
|
+
- '.wav'
|
|
113
|
+
- '.mp3'
|
|
114
|
+
- '.webm'
|
|
115
|
+
- '.mkv'
|
|
116
|
+
- '.avi'
|
|
117
|
+
- '.m4v'
|
|
118
|
+
- '.flac'
|
|
119
|
+
- '.ogg'
|
|
120
|
+
- '.wmv'
|
|
121
|
+
- '.aac'
|
|
122
|
+
- '.wma'
|
|
123
|
+
- '.aiff'
|
|
124
|
+
- '.opus'
|
|
125
|
+
- '.zoom'
|
|
126
|
+
|
|
127
|
+
transient:
|
|
128
|
+
- '.pyc'
|
|
129
|
+
- '.pyo'
|
|
130
|
+
- '.class'
|
|
131
|
+
- '.o'
|
|
132
|
+
- '.obj'
|
|
133
|
+
- '.exe'
|
|
134
|
+
- '.dll'
|
|
135
|
+
- '.so'
|
|
136
|
+
- '.dylib'
|
|
137
|
+
- '.log'
|
|
138
|
+
- '.tmp'
|
|
139
|
+
- '.temp'
|
|
140
|
+
- '.cache'
|
|
141
|
+
- '.lock'
|
|
142
|
+
- '.bak'
|
|
143
|
+
- '.swp'
|
|
144
|
+
- '.swo'
|
|
145
|
+
- '.map'
|
|
146
|
+
- '.tsbuildinfo'
|
|
147
|
+
- '.pysave'
|
|
148
|
+
|
|
149
|
+
data:
|
|
150
|
+
- '.csv'
|
|
151
|
+
- '.xlsx'
|
|
152
|
+
- '.xls'
|
|
153
|
+
- '.parquet'
|
|
154
|
+
- '.tsv'
|
|
155
|
+
- '.sqlite'
|
|
156
|
+
- '.db'
|
|
157
|
+
- '.sql'
|
|
158
|
+
- '.mdb'
|
|
159
|
+
- '.accdb'
|
|
160
|
+
- '.dat'
|
|
161
|
+
- '.jsonl'
|
|
162
|
+
- '.ndjson'
|
|
163
|
+
# Font files (treated as data assets)
|
|
164
|
+
- '.woff'
|
|
165
|
+
- '.woff2'
|
|
166
|
+
- '.ttf'
|
|
167
|
+
- '.otf'
|
|
168
|
+
- '.eot'
|
|
169
|
+
# GIS shapefile components
|
|
170
|
+
- '.shp'
|
|
171
|
+
- '.shx'
|
|
172
|
+
- '.dbf'
|
|
173
|
+
- '.prj'
|
|
174
|
+
- '.sbn'
|
|
175
|
+
- '.sbx'
|
|
176
|
+
# Serialization formats
|
|
177
|
+
- '.pickle'
|
|
178
|
+
- '.pkl'
|
|
179
|
+
- '.jar'
|
|
180
|
+
- '.npy'
|
|
181
|
+
- '.npz'
|
|
182
|
+
# Playlist/media metadata
|
|
183
|
+
- '.m3u'
|
|
184
|
+
- '.m3u8'
|
|
185
|
+
- '.pls'
|
|
186
|
+
# Database/backup files
|
|
187
|
+
- '.sqlite3'
|
|
188
|
+
- '.bin'
|
|
189
|
+
# Package/installer files
|
|
190
|
+
- '.pkg'
|
|
191
|
+
- '.dmg'
|
|
192
|
+
- '.msi'
|
|
193
|
+
# Certificate files
|
|
194
|
+
- '.p12'
|
|
195
|
+
- '.crt'
|
|
196
|
+
- '.pem'
|
|
197
|
+
- '.cer'
|
|
198
|
+
- '.pfx'
|
|
199
|
+
- '.pkpass'
|
|
200
|
+
# Spreadsheet formats
|
|
201
|
+
- '.ods'
|
|
202
|
+
# Event/log files
|
|
203
|
+
- '.evt'
|
|
204
|
+
- '.evtx'
|
|
205
|
+
# Subtitle files
|
|
206
|
+
- '.srt'
|
|
207
|
+
- '.sub'
|
|
208
|
+
- '.vtt'
|
|
209
|
+
# Genealogy data
|
|
210
|
+
- '.ged'
|
|
211
|
+
|
|
212
|
+
config:
|
|
213
|
+
- '.yaml'
|
|
214
|
+
- '.yml'
|
|
215
|
+
- '.toml'
|
|
216
|
+
- '.ini'
|
|
217
|
+
- '.conf'
|
|
218
|
+
- '.cfg'
|
|
219
|
+
- '.env'
|
|
220
|
+
- '.properties'
|
|
221
|
+
# Environment/config variants
|
|
222
|
+
- '.example'
|
|
223
|
+
- '.sample'
|
|
224
|
+
- '.template'
|
|
225
|
+
- '.local'
|
|
226
|
+
# IDE/editor workspace configs
|
|
227
|
+
- '.code-workspace'
|
|
228
|
+
- '.xcworkspacedata'
|
|
229
|
+
# Apple property lists
|
|
230
|
+
- '.plist'
|
|
231
|
+
# macOS automation
|
|
232
|
+
- '.agent'
|
|
233
|
+
|
|
234
|
+
documentation:
|
|
235
|
+
- '.md'
|
|
236
|
+
- '.markdown'
|
|
237
|
+
- '.rst'
|
|
238
|
+
- '.adoc'
|
|
239
|
+
- '.asciidoc'
|
|
240
|
+
- '.tex'
|
|
241
|
+
- '.rtf'
|
|
242
|
+
|
|
243
|
+
administrative:
|
|
244
|
+
- '.docx'
|
|
245
|
+
- '.doc'
|
|
246
|
+
- '.pdf'
|
|
247
|
+
- '.odt'
|
|
248
|
+
- '.pages'
|
|
249
|
+
- '.pptx'
|
|
250
|
+
- '.ppt'
|
|
251
|
+
- '.odp'
|
|
252
|
+
- '.key'
|
|
253
|
+
- '.numbers'
|
|
254
|
+
- '.ics'
|
|
255
|
+
- '.eml'
|
|
256
|
+
- '.email'
|
|
257
|
+
- '.msg'
|
|
258
|
+
|
|
259
|
+
web:
|
|
260
|
+
- '.html'
|
|
261
|
+
- '.htm'
|
|
262
|
+
- '.xhtml'
|
|
263
|
+
- '.xml'
|
|
264
|
+
- '.xsl'
|
|
265
|
+
- '.xslt'
|
|
266
|
+
- '.svg'
|
|
267
|
+
- '.rss'
|
|
268
|
+
- '.atom'
|
|
269
|
+
|
|
270
|
+
archive:
|
|
271
|
+
- '.zip'
|
|
272
|
+
- '.tar'
|
|
273
|
+
- '.gz'
|
|
274
|
+
- '.bz2'
|
|
275
|
+
- '.7z'
|
|
276
|
+
- '.rar'
|
|
277
|
+
- '.tgz'
|
|
278
|
+
- '.xz'
|
|
279
|
+
- '.dmg'
|
|
280
|
+
- '.iso'
|
|
281
|
+
|
|
282
|
+
ambiguous:
|
|
283
|
+
- '.json'
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Filename and path patterns for content type detection
|
|
2
|
+
|
|
3
|
+
# TRANSIENT file patterns (safe to delete)
|
|
4
|
+
transient_patterns:
|
|
5
|
+
- '\.DS_Store$'
|
|
6
|
+
- 'Thumbs\.db$'
|
|
7
|
+
- '\.tmp$'
|
|
8
|
+
- '\.temp$'
|
|
9
|
+
- '~$'
|
|
10
|
+
- '\.swp$'
|
|
11
|
+
- '\.swo$'
|
|
12
|
+
- '\.log$'
|
|
13
|
+
- '\.cache'
|
|
14
|
+
- '__pycache__'
|
|
15
|
+
- 'node_modules'
|
|
16
|
+
- '\.npm'
|
|
17
|
+
- '\.yarn'
|
|
18
|
+
- 'venv'
|
|
19
|
+
- '\.venv'
|
|
20
|
+
- 'env/'
|
|
21
|
+
- '\.env/'
|
|
22
|
+
- '\.pyc$'
|
|
23
|
+
- '\.pyo$'
|
|
24
|
+
- '\.class$'
|
|
25
|
+
- '\.o$'
|
|
26
|
+
- '\.obj$'
|
|
27
|
+
- '\.exe$'
|
|
28
|
+
- 'dist/'
|
|
29
|
+
- 'build/'
|
|
30
|
+
- '\.next/'
|
|
31
|
+
- '\.git/objects'
|
|
32
|
+
- '\.git/lfs'
|
|
33
|
+
|
|
34
|
+
transient_path_patterns:
|
|
35
|
+
- '/Downloads/'
|
|
36
|
+
- '/Trash/'
|
|
37
|
+
- '/tmp/'
|
|
38
|
+
- '/temp/'
|
|
39
|
+
- '/cache/'
|
|
40
|
+
- '/\.cache/'
|
|
41
|
+
- '/Caches/'
|
|
42
|
+
|
|
43
|
+
# REFERENCE file patterns (third-party docs, tutorials)
|
|
44
|
+
reference_patterns:
|
|
45
|
+
- 'tutorial'
|
|
46
|
+
- 'manual'
|
|
47
|
+
- 'handbook'
|
|
48
|
+
- 'reference'
|
|
49
|
+
- 'documentation'
|
|
50
|
+
- 'getting[_-]?started'
|
|
51
|
+
- 'cheat[_-]?sheet'
|
|
52
|
+
- 'quick[_-]?start'
|
|
53
|
+
- 'user[_-]?guide'
|
|
54
|
+
- 'api[_-]?docs?'
|
|
55
|
+
|
|
56
|
+
reference_path_patterns:
|
|
57
|
+
- '/tutorials?/'
|
|
58
|
+
- '/docs?/'
|
|
59
|
+
- '/reference/'
|
|
60
|
+
- '/manuals?/'
|
|
61
|
+
- '/guides?/'
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# MIME type fragment -> ContentType mapping
|
|
2
|
+
# These appear in the database as partial MIME types
|
|
3
|
+
# Values must match ContentType enum names
|
|
4
|
+
|
|
5
|
+
# Audio/Video MIME fragments
|
|
6
|
+
x-wav: MEDIA_RECORDING
|
|
7
|
+
x-aiff: MEDIA_RECORDING
|
|
8
|
+
mp4a-lat: MEDIA_RECORDING
|
|
9
|
+
mpeg: MEDIA_RECORDING
|
|
10
|
+
mp2t: MEDIA_RECORDING
|
|
11
|
+
quicktim: MEDIA_RECORDING
|
|
12
|
+
x-m4a: MEDIA_RECORDING
|
|
13
|
+
audio: MEDIA_RECORDING
|
|
14
|
+
video: MEDIA_RECORDING
|
|
15
|
+
|
|
16
|
+
# Image MIME fragments
|
|
17
|
+
x-icon: MEDIA_IMAGE
|
|
18
|
+
x-png: MEDIA_IMAGE
|
|
19
|
+
image: MEDIA_IMAGE
|
|
20
|
+
|
|
21
|
+
# Code/text MIME fragments
|
|
22
|
+
x-python: CODE
|
|
23
|
+
javascri: CODE
|
|
24
|
+
x-sh: CODE
|
|
25
|
+
x-csh: CODE
|
|
26
|
+
x-csrc: CODE
|
|
27
|
+
x-c++sr: CODE
|
|
28
|
+
x-java: CODE
|
|
29
|
+
x-sql: CODE
|
|
30
|
+
x-tex: DOCUMENTATION
|
|
31
|
+
x-latex: DOCUMENTATION
|
|
32
|
+
html: CODE
|
|
33
|
+
xml: CODE
|
|
34
|
+
plain: DOCUMENTATION
|
|
35
|
+
rtf: DOCUMENTATION
|
|
36
|
+
markdown: DOCUMENTATION
|
|
37
|
+
calendar: ADMINISTRATIVE
|
|
38
|
+
|
|
39
|
+
# Data MIME fragments
|
|
40
|
+
octet-st: DATA
|
|
41
|
+
spreadsh: DATA
|
|
42
|
+
csv: DATA
|
|
43
|
+
tab-sepa: DATA
|
|
44
|
+
|
|
45
|
+
# Archive
|
|
46
|
+
zip: DATA
|
|
47
|
+
x-tar: DATA
|
|
48
|
+
x-gzip: DATA
|
|
49
|
+
|
|
50
|
+
# Document MIME fragments
|
|
51
|
+
pdf: ADMINISTRATIVE
|
|
52
|
+
msword: ADMINISTRATIVE
|
|
53
|
+
wordproc: ADMINISTRATIVE
|
|
54
|
+
presenta: ADMINISTRATIVE
|
|
55
|
+
document: ADMINISTRATIVE
|
|
56
|
+
vnd.ms-e: DATA
|
|
57
|
+
vnd.oasi: ADMINISTRATIVE
|
|
58
|
+
x-iwork-: ADMINISTRATIVE
|
|
59
|
+
vnd.yell: CODE
|
|
60
|
+
|
|
61
|
+
# Package/archive MIME types
|
|
62
|
+
x-zip: DATA
|
|
63
|
+
x-pkcs12: DATA
|
|
64
|
+
x-x509-c: DATA
|
|
65
|
+
x-shells: CODE
|
|
66
|
+
wsdl+xml: CODE
|
|
67
|
+
xml-dtd: CODE
|
|
68
|
+
x-subrip: DATA
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Salesforce-specific classification rules
|
|
2
|
+
# This file is a self-contained pattern pack. When enabled via pattern_packs
|
|
3
|
+
# config, these extensions/patterns are merged into the classifier at init time.
|
|
4
|
+
|
|
5
|
+
# Extensions representing portfolio work (KEEP)
|
|
6
|
+
keep_extensions:
|
|
7
|
+
- '.cls'
|
|
8
|
+
- '.trigger'
|
|
9
|
+
- '.cmp'
|
|
10
|
+
- '.component'
|
|
11
|
+
- '.page'
|
|
12
|
+
- '.flow'
|
|
13
|
+
- '.workflow'
|
|
14
|
+
- '.object'
|
|
15
|
+
- '.layout'
|
|
16
|
+
- '.permissionset'
|
|
17
|
+
- '.profile'
|
|
18
|
+
- '.app'
|
|
19
|
+
- '.tab'
|
|
20
|
+
|
|
21
|
+
# Salesforce-specific file extensions merged into core extension sets when pack is enabled
|
|
22
|
+
extensions:
|
|
23
|
+
code:
|
|
24
|
+
- '.cmp'
|
|
25
|
+
- '.component'
|
|
26
|
+
- '.page'
|
|
27
|
+
- '.flow'
|
|
28
|
+
- '.workflow'
|
|
29
|
+
- '.object'
|
|
30
|
+
- '.layout'
|
|
31
|
+
- '.permissionset'
|
|
32
|
+
- '.profile'
|
|
33
|
+
- '.app'
|
|
34
|
+
- '.tab'
|
|
35
|
+
- '.quickaction'
|
|
36
|
+
- '.soql'
|
|
37
|
+
- '.tag'
|
|
38
|
+
- '.flexipage'
|
|
39
|
+
- '.report'
|
|
40
|
+
- '.objecttranslation'
|
|
41
|
+
- '.flowdefinition'
|
|
42
|
+
- '.globalvalueset'
|
|
43
|
+
- '.auradoc'
|
|
44
|
+
- '.topicsforobjects'
|
|
45
|
+
data:
|
|
46
|
+
- '.resource'
|
|
47
|
+
|
|
48
|
+
# LWC/Aura paths (keep .js, .html, .css within these)
|
|
49
|
+
component_paths:
|
|
50
|
+
- 'lwc/'
|
|
51
|
+
- 'aura/'
|
|
52
|
+
- 'force-app/'
|
|
53
|
+
|
|
54
|
+
# Path markers that identify Salesforce project directories
|
|
55
|
+
path_markers:
|
|
56
|
+
- 'force-app'
|
|
57
|
+
- '.sfdx'
|
|
58
|
+
|
|
59
|
+
# LWC webruntime bundle file prefixes (no-extension files)
|
|
60
|
+
lwr_file_prefixes:
|
|
61
|
+
- 'lwr_loader'
|
|
62
|
+
- 'lwr_app'
|
|
63
|
+
- 'lwr_error_shim'
|
|
64
|
+
- 'meta-pixel-event-dispatcher'
|
|
65
|
+
- 'runtime'
|
|
66
|
+
|
|
67
|
+
# XML file path prefixes for Salesforce XML disambiguation
|
|
68
|
+
xml_paths:
|
|
69
|
+
- 'force-app/'
|
|
70
|
+
- 'salesforce/'
|
|
71
|
+
|
|
72
|
+
# Patterns for boilerplate/auto-generated files to EXCLUDE
|
|
73
|
+
exclude_patterns:
|
|
74
|
+
- '-meta\.xml$'
|
|
75
|
+
- '\.settings$'
|
|
76
|
+
- '\.labels$'
|
|
77
|
+
- '\.sharingrules$'
|
|
78
|
+
- '\.assignmentrules$'
|
|
79
|
+
- '\.autoresponserules$'
|
|
80
|
+
- '\.escalationrules$'
|
|
81
|
+
- '\.approvalprocess$'
|
|
82
|
+
- '\.remotesite$'
|
|
83
|
+
- '\.namedcredential$'
|
|
84
|
+
- '\.externalcredential$'
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# PII and credential detection patterns
|
|
2
|
+
# Used by RetentionClassifier for sensitivity analysis
|
|
3
|
+
|
|
4
|
+
pii_patterns:
|
|
5
|
+
# Identity numbers
|
|
6
|
+
ssn: '\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b'
|
|
7
|
+
ein: '\b\d{2}-\d{7}\b'
|
|
8
|
+
passport: '\b[A-Z]{1,2}\d{6,9}\b'
|
|
9
|
+
# Financial
|
|
10
|
+
credit_card: '\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})\b'
|
|
11
|
+
bank_account: '\b(?:account|acct|routing)[\s#:]*\d{8,17}\b'
|
|
12
|
+
# Contact (contextual - only flag bulk lists, not single emails)
|
|
13
|
+
bulk_email_list: '(?:[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\s*[,;\n]){3,}'
|
|
14
|
+
phone: '\b(?:\+1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
|
|
15
|
+
# Address (full format only - street + city + state + zip)
|
|
16
|
+
address: '\b\d+\s+[\w\s]+(?:St(?:reet)?|Ave(?:nue)?|Rd|Road|Blvd|Dr(?:ive)?|Ln|Lane|Way|Ct|Court)[\s,]+[\w\s]+,?\s+[A-Z]{2}\s+\d{5}(?:-\d{4})?\b'
|
|
17
|
+
# Dates with context only
|
|
18
|
+
date_of_birth: '\b(?:DOB|Date of Birth|Birth(?:date)?|Born)[\s:]+\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b'
|
|
19
|
+
|
|
20
|
+
credential_patterns:
|
|
21
|
+
api_key: "(?:api[_-]?key|apikey)\\s*[=:]\\s*[\"']?[\\w-]{20,}"
|
|
22
|
+
aws_key: 'AKIA[0-9A-Z]{16}'
|
|
23
|
+
aws_secret: "(?:aws[_-]?secret|secret[_-]?key)\\s*[=:]\\s*[\"']?[\\w/+=]{40}"
|
|
24
|
+
stripe_key: 'sk_(?:live|test)_[0-9a-zA-Z]{24,}'
|
|
25
|
+
password: "(?:password|passwd|pwd)\\s*[=:]\\s*[\"'][^\"']{6,}[\"']"
|
|
26
|
+
bearer_token: 'Bearer\s+[A-Za-z0-9_-]{20,}'
|
|
27
|
+
private_key: '-----BEGIN (?:RSA |EC )?PRIVATE KEY-----'
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Footprinter CLI router."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _check_python_version() -> None:
|
|
8
|
+
"""Exit with a clear message if Python is too old."""
|
|
9
|
+
if sys.version_info < (3, 11):
|
|
10
|
+
print(
|
|
11
|
+
f"Error: Footprinter requires Python 3.11 or later (found {sys.version_info[0]}.{sys.version_info[1]}).",
|
|
12
|
+
file=sys.stderr,
|
|
13
|
+
)
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _is_first_run() -> bool:
|
|
18
|
+
"""Return True if neither config file nor database exists."""
|
|
19
|
+
from footprinter.paths import get_config_path, get_db_path
|
|
20
|
+
|
|
21
|
+
return not get_config_path().exists() and not get_db_path().exists()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def main(argv=None) -> None:
|
|
25
|
+
"""Entry point for the ``fp`` command."""
|
|
26
|
+
_check_python_version()
|
|
27
|
+
|
|
28
|
+
import sys as _sys
|
|
29
|
+
|
|
30
|
+
if argv is None:
|
|
31
|
+
argv = _sys.argv[1:]
|
|
32
|
+
from footprinter.source_registry import ConfigError as _ConfigError
|
|
33
|
+
|
|
34
|
+
from footprinter import __version__
|
|
35
|
+
from footprinter.cli._common import FORMATTER
|
|
36
|
+
|
|
37
|
+
parser = argparse.ArgumentParser(
|
|
38
|
+
prog="fp",
|
|
39
|
+
description=f"Footprinter v{__version__} — file archival and AI context CLI",
|
|
40
|
+
epilog=(
|
|
41
|
+
"getting started:\n"
|
|
42
|
+
" fp setup Run the configuration wizard\n"
|
|
43
|
+
" fp setup --check Validate existing configuration\n"
|
|
44
|
+
" fp connect list Show available data source connectors\n"
|
|
45
|
+
"\n"
|
|
46
|
+
"data commands:\n"
|
|
47
|
+
" fp ingest Run the data ingest pipeline (incremental)\n"
|
|
48
|
+
" fp ingest --full Re-process all data sources\n"
|
|
49
|
+
" fp status Show data counts and system health\n"
|
|
50
|
+
" fp search 'my query' Semantic search across indexed content\n"
|
|
51
|
+
"\n"
|
|
52
|
+
"browse indexed data:\n"
|
|
53
|
+
" fp view files List indexed files\n"
|
|
54
|
+
" fp view folders List indexed folders\n"
|
|
55
|
+
" fp view projects List projects\n"
|
|
56
|
+
" fp view clients List clients\n"
|
|
57
|
+
" fp view chats List chats\n"
|
|
58
|
+
" fp view emails List indexed emails\n"
|
|
59
|
+
" fp view visits List browser history\n"
|
|
60
|
+
"\n"
|
|
61
|
+
"servers:\n"
|
|
62
|
+
" fp mcp Start the MCP server\n"
|
|
63
|
+
" fp api Start the HTTP API server\n"
|
|
64
|
+
" fp mcp check ~/Work/file Check access resolution for a path\n"
|
|
65
|
+
"\n"
|
|
66
|
+
"tip: use 'fp <command> --help' for details on any command."
|
|
67
|
+
),
|
|
68
|
+
formatter_class=FORMATTER,
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--version",
|
|
72
|
+
action="version",
|
|
73
|
+
version=f"%(prog)s {__version__}",
|
|
74
|
+
)
|
|
75
|
+
subparsers = parser.add_subparsers(dest="subcommand", metavar="COMMAND")
|
|
76
|
+
subparsers.required = False
|
|
77
|
+
|
|
78
|
+
from footprinter.cli import (
|
|
79
|
+
api_cmd,
|
|
80
|
+
connect,
|
|
81
|
+
data,
|
|
82
|
+
delete,
|
|
83
|
+
ingest,
|
|
84
|
+
mcp_cmd,
|
|
85
|
+
search_cmd,
|
|
86
|
+
setup,
|
|
87
|
+
status_cmd,
|
|
88
|
+
upsert,
|
|
89
|
+
vectorize_cmd,
|
|
90
|
+
view,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
for mod in [
|
|
94
|
+
ingest,
|
|
95
|
+
mcp_cmd,
|
|
96
|
+
api_cmd,
|
|
97
|
+
status_cmd,
|
|
98
|
+
search_cmd,
|
|
99
|
+
setup,
|
|
100
|
+
connect,
|
|
101
|
+
view,
|
|
102
|
+
upsert,
|
|
103
|
+
data,
|
|
104
|
+
delete,
|
|
105
|
+
vectorize_cmd,
|
|
106
|
+
]:
|
|
107
|
+
mod.register(subparsers)
|
|
108
|
+
|
|
109
|
+
args = parser.parse_args(argv)
|
|
110
|
+
if args.subcommand is None:
|
|
111
|
+
if _is_first_run():
|
|
112
|
+
print(
|
|
113
|
+
"\033[33;1m\U0001f4a1 Looks like this is your first time running "
|
|
114
|
+
"Footprinter.\n Run 'fp setup' to get started.\033[0m\n",
|
|
115
|
+
file=_sys.stderr,
|
|
116
|
+
)
|
|
117
|
+
parser.print_help()
|
|
118
|
+
return
|
|
119
|
+
from footprinter.cli._prompt import PromptCancelled
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
args.func(args)
|
|
123
|
+
except _ConfigError as e:
|
|
124
|
+
print(str(e), file=_sys.stderr)
|
|
125
|
+
_sys.exit(1)
|
|
126
|
+
except (PromptCancelled, KeyboardInterrupt):
|
|
127
|
+
print("\nCancelled.", file=_sys.stderr)
|
|
128
|
+
_sys.exit(130)
|