netlapse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. netlapse/README.md +880 -0
  2. netlapse/__init__.py +3 -0
  3. netlapse/__main__.py +326 -0
  4. netlapse/api/__init__.py +0 -0
  5. netlapse/api/admin.py +788 -0
  6. netlapse/api/dcim_admin.py +154 -0
  7. netlapse/api/native.py +876 -0
  8. netlapse/api/oxidized_compat.py +341 -0
  9. netlapse/api/parse_audit.py +390 -0
  10. netlapse/app.py +593 -0
  11. netlapse/auth/__init__.py +24 -0
  12. netlapse/auth/hashing.py +79 -0
  13. netlapse/auth/manager.py +231 -0
  14. netlapse/auth/providers.py +483 -0
  15. netlapse/auth/store.py +294 -0
  16. netlapse/auth/web.py +598 -0
  17. netlapse/config_template.py +184 -0
  18. netlapse/core/__init__.py +0 -0
  19. netlapse/core/collector.py +607 -0
  20. netlapse/dcim/__init__.py +14 -0
  21. netlapse/dcim/db_schema.py +2438 -0
  22. netlapse/dcim/dbrepair.py +264 -0
  23. netlapse/dcim/map_importer.py +1060 -0
  24. netlapse/definitions/captures.yaml +84 -0
  25. netlapse/definitions/jobs.yaml +58 -0
  26. netlapse/definitions/roles.yaml +67 -0
  27. netlapse/definitions/volatility.yaml +66 -0
  28. netlapse/parse_test.py +305 -0
  29. netlapse/parser/__init__.py +17 -0
  30. netlapse/parser/engine.py +421 -0
  31. netlapse/parser/tfsm_fire.py +335 -0
  32. netlapse/registry/__init__.py +54 -0
  33. netlapse/registry/cli.py +176 -0
  34. netlapse/registry/loader.py +255 -0
  35. netlapse/registry/models.py +103 -0
  36. netlapse/registry/resolver.py +58 -0
  37. netlapse/registry/service.py +90 -0
  38. netlapse/registry/sync.py +191 -0
  39. netlapse/registry/volatility.py +246 -0
  40. netlapse/scheduler/__init__.py +809 -0
  41. netlapse/ssh/__init__.py +76 -0
  42. netlapse/ssh/client.py +1098 -0
  43. netlapse/ssh/emulation.py +302 -0
  44. netlapse/ssh/executor.py +750 -0
  45. netlapse/ssh/proxy.py +383 -0
  46. netlapse/storage/__init__.py +64 -0
  47. netlapse/storage/backend.py +469 -0
  48. netlapse/storage/config_diff.py +188 -0
  49. netlapse/storage/diff.py +248 -0
  50. netlapse/storage/file_backend.py +668 -0
  51. netlapse/storage/git_backend.py +691 -0
  52. netlapse/storage/health.py +126 -0
  53. netlapse/storage/token_match.py +145 -0
  54. netlapse/vault/__init__.py +95 -0
  55. netlapse/vault/bridge.py +489 -0
  56. netlapse/vault/encryption.py +340 -0
  57. netlapse/vault/models.py +392 -0
  58. netlapse/vault/schema.py +412 -0
  59. netlapse/vault/vault.py +1060 -0
  60. netlapse/web/__init__.py +40 -0
  61. netlapse/web/static/css/admin.css +348 -0
  62. netlapse/web/static/css/netlapse-dark.css +1164 -0
  63. netlapse/web/static/css/netlapse-light.css +1163 -0
  64. netlapse/web/static/css/netlapse.css +1107 -0
  65. netlapse/web/static/css/theme_dark.css +69 -0
  66. netlapse/web/static/css/theme_light.css +67 -0
  67. netlapse/web/static/index.html +117 -0
  68. netlapse/web/static/js/api.js +239 -0
  69. netlapse/web/static/js/app.js +239 -0
  70. netlapse/web/static/js/components.js +239 -0
  71. netlapse/web/static/js/views/audit.js +369 -0
  72. netlapse/web/static/js/views/collection.js +185 -0
  73. netlapse/web/static/js/views/credentials.js +415 -0
  74. netlapse/web/static/js/views/dashboard.js +114 -0
  75. netlapse/web/static/js/views/device.js +1316 -0
  76. netlapse/web/static/js/views/devices.js +349 -0
  77. netlapse/web/static/js/views/jobs.js +633 -0
  78. netlapse/web/static/js/views/onesearch.js +473 -0
  79. netlapse/web/static/js/views/reference.js +330 -0
  80. netlapse/web/static/js/views/search.js +373 -0
  81. netlapse/web/static/js/views/settings.js +237 -0
  82. netlapse/web/static/js/views/templates.js +502 -0
  83. netlapse/web/static/js/views/users.js +397 -0
  84. netlapse/web/static/js/ws.js +136 -0
  85. netlapse-0.1.0.dist-info/METADATA +906 -0
  86. netlapse-0.1.0.dist-info/RECORD +89 -0
  87. netlapse-0.1.0.dist-info/WHEEL +5 -0
  88. netlapse-0.1.0.dist-info/entry_points.txt +2 -0
  89. netlapse-0.1.0.dist-info/top_level.txt +1 -0
netlapse/README.md ADDED
@@ -0,0 +1,880 @@
1
+ # Netlapse
2
+
3
+ An Oxidized replacment, network state time-lapse. Periodic structured snapshots of network device operational state with versioned diffs.
4
+
5
+
6
+ ![Netlapse — network state time-lapse](screenshots/slides.gif)
7
+
8
+ ## New Capabilities in Development
9
+
10
+ **LibreNMS integration (beta).** Netlapse now serves an Oxidized-compatible REST
11
+ API, so LibreNMS renders Netlapse's configs, version history, and diffs under each
12
+ device's **Config** tab — and the **Refresh** button triggers an on-demand
13
+ re-collection. Validated end to end against a live LibreNMS instance.
14
+
15
+ <p align="center">
16
+ <img src="screenshots/LibreNMS/config.png" width="49%" alt="LibreNMS Config tab rendering a Netlapse-served running-config for eng-spine-1" />
17
+ <img src="screenshots/LibreNMS/Diff.png" width="49%" alt="LibreNMS Show diff rendering a Netlapse unified diff between two config versions" />
18
+ </p>
19
+
20
+ See **[Netlapse + LibreNMS Integration](README_LibreNMS_Integration.md)** for
21
+ wiring, authentication modes, and the naming requirements that determine whether
22
+ configs render.
23
+
24
+ **LDAP / Active Directory authentication.** Beyond the built-in local accounts,
25
+ Netlapse authenticates against a directory with `auth.provider: ldap` (or
26
+ `ldap+local` to keep a local break-glass admin). Roles are directory-authoritative
27
+ — group membership maps to admin/viewer and is re-evaluated on every login — and
28
+ each directory user is provisioned as a local shadow row so the rest of the app
29
+ works unchanged. A mock mode boots the full login flow with no domain controller
30
+ for testing. See **[Authentication](README_Auth.md)**.
31
+
32
+ ## Why Netlapse?
33
+
34
+ Oxidized backs up configs as text blobs. It answers "what changed?" with `git diff`.
35
+
36
+ Netlapse captures *any CLI output* (configs, ARP tables, BGP state, routing tables, interface status), parses it into structured JSON, versions both artifacts, and answers "what changed?" with semantic diffs:
37
+
38
+ - **ARP diff**: "12 MACs learned, 3 aged out, 2 moved interfaces"
39
+ - **BGP diff**: "peer 10.0.0.1 went Established → Idle, prefix count dropped 4200 → 0"
40
+ - **Interface diff**: "Gi0/3 MTU changed 9000 → 1500"
41
+ - **Config diff**: same as Oxidized — git text diff. Table stakes.
42
+
43
+
44
+ ## One Search
45
+
46
+ Type one token — an IP, a MAC, an ASN, a hostname, a serial — and get *everything* that references it across the whole fleet, grouped by what it means. It's the structured version of the 2 AM `grep`: instead of greping one device at a time, you ask the question once and the answer comes back organized.
47
+
48
+ ![One Search — one token across inventory and every capture, grouped by capture type](screenshots/netlapse_themed/one-search.png)
49
+
50
+ A single search returns two answers:
51
+
52
+ - **Identity** — which managed device this token *is*. A management or OOB address, a hostname, a serial, an asset tag resolves to the device that owns it, with a link straight to its detail page.
53
+ - **References** — everywhere it's *seen*, grouped by capture type (config, ARP, MAC table, BGP, routes, version, …) then by device, with the matching lines highlighted. Open any hit to the full capture and copy it whole.
54
+
55
+ The matching is format-aware, because the value never appears the same way twice across a mixed fleet:
56
+
57
+ - **IP** is octet-anchored — searching `10.7.255.35` will not drag in `10.7.255.350` or `210.7.255.35`. The lines it returns are *actually* that address.
58
+ - **MAC** matches every vendor representation from one keystroke — type `d4af.f76c.45ad` and it also finds `d4:af:f7:6c:45:ad`, `d4-af-f7-6c-45-ad`, and the bare `d4aff76c45ad`, wherever they're stored.
59
+ - **ASN** matches with or without the `AS` prefix, so `AS6169` and `remote-as 6169` both surface.
60
+
61
+ Because it reads the captures Netlapse already stores, it has zero runtime coupling to collection — it's a query over the dual-artifact tree, not a second index to keep in sync. Trace a `/31` link subnet to the device that advertises it, pivot from its ARP entry to the chassis MAC in `show version`, and confirm they're the same box — in two searches, with no topology tool and no correlation database underneath.
62
+
63
+ ## Semantic Diff
64
+
65
+ Two snapshots, one question that actually matters at 2 AM: did the network change, or did a timer just tick? Semantic diff answers it by comparing parsed records instead of raw text.
66
+ It matches each record by its identity — an ARP entry by IP, a BGP session by neighbor, an OSPF adjacency by neighbor ID — and reports only meaningful state changes: a MAC relearned on a different interface, a peer dropped to Idle, a route's next-hop moved. Volatile fields that tick every poll — age timers, uptimes, counters — are excluded by default, so the diff stays quiet when nothing operationally significant happened and surfaces the one line that matters when something did
67
+
68
+ ![Netlapse — Semantic Diff](screenshots/diff/lldp_symantic_diff_complex.png)
69
+
70
+ > **⚠️ Beta Version Notice**
71
+ >
72
+ > Netlapse is **beta** (v0.1.0). It can run in production, but expect rough edges and breaking changes before 1.0.
73
+ >
74
+ > **Authentication.** Sessions are cookie-based; the local backend stores usernames with `scrypt`-hashed passwords, bootstrapped from a single `admin` account (set `NETLAPSE_ADMIN_PASSWORD` on first run). LDAP/Active Directory authentication is also supported (`auth.provider: ldap` or `ldap+local`), with directory-authoritative roles and a local break-glass fallback — see **[Authentication](README_Auth.md)**. The local-only default is adequate for a trusted operator or small team on a trusted network; it is **not** hardened for untrusted or multi-tenant exposure.
75
+ >
76
+ > **LDAP/AD is implemented.** The provider binds against a directory (service-bind → locate → proxy-bind), gates access on group membership, maps `admin_groups` to the admin role, and provisions a local shadow row per directory user. Group membership is re-evaluated on every login (directory-authoritative). A built-in mock mode lets you exercise the full login path with no DC; nested-group expansion is the one documented limitation. See **[Authentication](README_Auth.md)** for sample configs and the break-glass procedure.
77
+
78
+
79
+ ## Data-Driven Collection
80
+
81
+ What Netlapse collects — and how each vendor's CLI says it — is defined in two YAML files, not in code. They live as siblings of `config.yaml` (default `~/.netlapse/`):
82
+
83
+ **`captures.yaml`** — the catalog of collectable artifacts. Each capture is a vendor-neutral intent (an ARP table, a BGP summary) plus the syntax to get it. `command` is either one string for everyone, or a map keyed by platform slug with a reserved `default` fallback:
84
+
85
+ ```yaml
86
+ captures:
87
+ bgp-summary:
88
+ capture_type: bgp
89
+ description: BGP session state
90
+ command:
91
+ default: show ip bgp summary
92
+ juniper_junos: show bgp summary
93
+ interval: 900
94
+ ```
95
+
96
+ **`jobs.yaml`** — bindings: which capture runs against which devices. A binding carries no command syntax and no interval (those come from the capture it references) — only the device selection:
97
+
98
+ ```yaml
99
+ jobs:
100
+ bgp-summary:
101
+ capture: bgp-summary
102
+ name: BGP summary
103
+ enabled: true
104
+ filters:
105
+ platform: cisco_ios,arista_eos,juniper_junos
106
+ ```
107
+
108
+ Adding a vendor, a capture, or a whole new artifact type is a file edit and a restart — no coder in the loop:
109
+
110
+ ```bash
111
+ # edit ~/.netlapse/captures.yaml + jobs.yaml, then:
112
+ python -m netlapse definitions validate # offline, all-or-nothing check
113
+ python -m netlapse definitions sync # project into the jobs table (or just restart)
114
+ ```
115
+
116
+ The new capture type then propagates to the UI through the DB — it appears in the Capture Type selector on every job, with no template or schema work.
117
+
118
+ **Files are authoritative for definitions; the DB owns runtime state.** A sync reconciles the jobs table to the files — insert new, update changed, tombstone removed — but never touches the live enable/disable toggle, the schedule, or job history. Validation runs over the whole catalog at once and is fail-soft: a typo is reported alongside every other problem in one pass, and the sync is skipped entirely rather than half-applied — the daemon keeps running on the last-good definitions already in the DB.
119
+
120
+ The `definitions` CLI rounds it out: `validate`, `sync`, `list`, and `adopt` (which takes over jobs that predate the registry). See [Design Decisions](#design-decisions) for why definitions are file-authoritative while platforms stayed in the DB.
121
+
122
+ ## Project Status
123
+
124
+
125
+ | Layer | Module | Lines | Status |
126
+ |---|---|---|---|
127
+ | **API** | Oxidized compat (9 routes) | 220 | ✅ Wired to DCIM + storage + scheduler |
128
+ | **API** | Native REST (27 routes) | 687 | ✅ Full CRUD + device edit + auth test + scheduler triggers + search |
129
+ | **DCIM** | NetBox-aligned SQLite schema (v6) | 1,439 | ✅ Jobs, devices, history, CRUD, per-platform commands, auto-migration |
130
+ | **DCIM** | SC2 map importer | 817 | ✅ Topology → DCIM sync, idempotent, full hostname preservation |
131
+ | **Storage** | File backend (directory tree) | 417 | ✅ Dual artifacts (raw + parsed JSON), last-N rotation |
132
+ | **Storage** | Git backend (versioned + trailers) | 738 | ✅ Tested |
133
+ | **SSH** | Client (Paramiko wrapper) | 817 | ✅ Ported from SC2, key auth, legacy algorithm + pubkey signature handling |
134
+ | **SSH** | Emulation shim (NetEmulate) | 302 | ✅ 82/82 devices verified |
135
+ | **SSH** | Executor (DCIM → SSH → Snapshot) | 602 | ✅ Built, auth test, device-level legacy override |
136
+ | **Vault** | Credential vault (Fernet/PBKDF2) | 2,142 | ✅ Ported from SC2, headless unlock, key + password auth |
137
+ | **Vault** | DCIM bridge + credential resolution | 350 | ✅ Vault ↔ executor integration |
138
+ | **Core** | Collection pipeline (DCIM→vault→SSH→disk) | 502 | ✅ End-to-end proven, \r\n normalization |
139
+ | **Core** | CLI (sync-map, vault, collect) | 232 | ✅ Subcommand dispatch |
140
+ | **Parser** | tfsm-fire engine (TextFSM auto-template) | 232 | ✅ Ported from SC2.5, thread-safe |
141
+ | **Parser** | Parse engine (clean + score + enrich) | 392 | ✅ Output cleaning, filter cascade, vendor fallback |
142
+ | **Parser** | Template database | 296 templates | ✅ Arista (48), Cisco IOS (143), Juniper (22), NX-OS, ASA |
143
+ | **Scheduler** | Async job loop + WS broadcast + parsing | 571 | ✅ Running in production, inline tfsm-fire, per-platform command resolution |
144
+ | **Web** | Dashboard (FastAPI + vanilla JS) | 3,880 | ✅ Live — 6 views, job CRUD, device edit + auth test, parsed data tables, capture type selector |
145
+
146
+ ```
147
+ netlapse/
148
+ ├── __init__.py (3) Package version (0.1.0)
149
+ ├── __main__.py (287) CLI: serve, sync-map, vault (init/add-ssh/delete/list/assign), collect
150
+ ├── app.py (255) FastAPI app, lifespan, vault unlock, parser init, scheduler start
151
+ ├── parse_test.py (305) Single-device collect + parse diagnostic tool
152
+ ├── api/
153
+ │ ├── native.py (687) 27 native REST routes at /api/v1/
154
+ │ └── oxidized_compat.py (220) 9 Oxidized-compatible routes at /
155
+ ├── core/
156
+ │ └── collector.py (502) End-to-end collection pipeline, \r\n normalization
157
+ ├── dcim/
158
+ │ ├── db_schema.py (1439) SQLite schema v6, views, queries, CRUD, device edit, auto-migration
159
+ │ └── map_importer.py (817) SC2 topology map → DCIM sync, hostname.site preservation
160
+ ├── parser/
161
+ │ ├── __init__.py (17) Package exports: ParseEngine, ParseResult
162
+ │ ├── tfsm_fire.py (232) TextFSMAutoEngine — template matching + scoring (from SC2.5)
163
+ │ └── engine.py (392) ParseEngine — output cleaning, filter cascade, vendor fallback
164
+ ├── scheduler/
165
+ │ └── __init__.py (571) ConnectionManager + Scheduler (poll loop, queue, WS broadcast, inline parsing, per-platform commands)
166
+ ├── storage/
167
+ │ ├── backend.py (280) Abstract interface + Snapshot/DiffResult
168
+ │ ├── file_backend.py (417) Directory tree, last-N rotation, multi-type search, parsed JSON write-through
169
+ │ └── git_backend.py (738) Git versioning, commit trailers, structured diffs
170
+ ├── ssh/
171
+ │ ├── emulation.py (302) NetEmulate shim (standalone, reusable)
172
+ │ ├── client.py (817) SSHClient with DCIM platform field mapping, legacy pubkey handling
173
+ │ └── executor.py (602) v_device_detail → SSHClient → Snapshot[], auth test with debug capture
174
+ ├── vault/
175
+ │ ├── encryption.py (339) PBKDF2 key derivation + Fernet encryption
176
+ │ ├── models.py (391) SSH/SNMP credential dataclasses
177
+ │ ├── schema.py (353) Vault SQLite schema + DatabaseManager
178
+ │ ├── vault.py (1059) CredentialVault — CRUD, encrypt/decrypt
179
+ │ └── bridge.py (350) Headless unlock, DCIM↔vault integration
180
+ └── web/
181
+ ├── __init__.py (26) Web router — serves / and mounts /static
182
+ └── static/
183
+ ├── index.html (79) SPA shell: sidebar, content area, module loader
184
+ ├── css/
185
+ │ └── netlapse.css (1032) CSS custom properties, full component library + forms + modal
186
+ └── js/
187
+ ├── app.js (154) Hash router, view lifecycle, health polling
188
+ ├── api.js (86) All /api/v1 + Oxidized + native search + job CRUD + device edit + auth test
189
+ ├── components.js(125) Shared: badges, stat cards, formatters, icons (edit, toggle, trash)
190
+ ├── ws.js (136) WebSocket manager with auto-reconnect
191
+ └── views/
192
+ ├── dashboard.js (114) Health stats, site grid, recent jobs
193
+ ├── devices.js (126) Filterable inventory, URL param pre-filtering
194
+ ├── device.js (834) 4 tabs, capture type pills, parsed data, device edit modal, auth test
195
+ ├── jobs.js (592) Job CRUD with per-platform command map editor
196
+ ├── search.js (331) Multi-type regex search, detail modal, match nav
197
+ └── collection.js (185) Job selector, WebSocket progress, live log
198
+ ```
199
+
200
+ ## Quick Start
201
+
202
+ ```bash
203
+ pip install netlapse
204
+ ```
205
+
206
+ ### Import devices from Secure Cartography topology map
207
+
208
+ ```bash
209
+ # Dry run — parse and report, no writes
210
+ python -m netlapse sync-map /path/to/map.json --dry-run
211
+
212
+ # Import — creates sites, assigns roles, detects platforms
213
+ python -m netlapse sync-map /path/to/map.json
214
+
215
+ # Exclude OOB management switches
216
+ python -m netlapse sync-map /path/to/map.json --exclude-prefix oob
217
+ ```
218
+
219
+ The map importer supports two hostname conventions:
220
+
221
+ | Convention | Example | Device Name | Site |
222
+ |---|----------------------------|-----------------|---------|
223
+ | Dot-separated (datacenter/SP) | `border01.site1.company.com` | `border01.site1` | `site1` |
224
+ | Dot-separated (datacenter/SP) | `peer1-01.site1` | `peer1-01.site` | `site1` |
225
+ | Dash-prefixed (campus/enterprise) | `den-core-01` | `den-core-01` | `den` |
226
+
227
+ Device names preserve the operational hostname — domain suffixes are stripped but the site segment stays because it's part of the device identity (`border01.site1` and `border01.den2` are different devices).
228
+
229
+ Roles are inferred from hostname patterns (`border*` → router, `tor*` → leaf, `-core-` → core, `-sw-` → access). Platforms are parsed from SC2's discovery strings (`Arista DCS-7280SRA-48C6-F EOS 4.33.1.1F` → `arista_eos`, `Cisco IOS-XE 17.03.06` → `cisco_ios_xe`, `Juniper JUNOS 23.2R1-S2.5` → `juniper_junos`). Handles both short (`Arista EOS 4.33.1.1F`) and full chassis (`Arista DCS-7280SRA-48C6-F EOS 4.33.1.1F`) platform strings. Idempotent — safe to re-run after every SC2 discovery cycle.
230
+
231
+ ### Initialize credential vault and collect
232
+
233
+ ```bash
234
+ # Initialize vault with master password
235
+ python -m netlapse vault init
236
+
237
+ # Add SSH credentials (password auth)
238
+ python -m netlapse vault add-ssh lab -u admin -p admin --default
239
+
240
+ # Add SSH credentials (key auth)
241
+ python -m netlapse vault add-ssh prod -u scott -k ~/.ssh/id_ed25519 --default
242
+
243
+ # Add SSH credentials (key + password for enable)
244
+ python -m netlapse vault add-ssh prod -u scott -k ~/.ssh/id_rsa -p 'enable_pass' --default
245
+
246
+ # Assign credential to all devices
247
+ python -m netlapse vault assign prod
248
+
249
+ # Verify
250
+ python -m netlapse vault list
251
+
252
+ # Delete a credential
253
+ python -m netlapse vault delete lab
254
+
255
+ # Collect configs (one-shot CLI)
256
+ export NETLAPSE_VAULT_PASSWORD="your_master_password"
257
+ python -m netlapse collect --site den
258
+
259
+ # Collect against NetEmulate (emulated devices)
260
+ python -m netlapse collect --site den --emulate
261
+
262
+ # Run a named job
263
+ python -m netlapse collect --job config-backup
264
+
265
+ # Start the daemon (scheduler + API + Web UI)
266
+ python -m netlapse
267
+ ```
268
+
269
+ The vault stores key file contents encrypted — the original file isn't referenced at runtime, so the daemon doesn't need filesystem access to the key.
270
+
271
+ Browse the Web UI at `http://localhost:8888` or the Swagger API docs at `http://localhost:8888/docs`.
272
+
273
+ ### First scheduled collection (65/65 against NetEmulate)
274
+
275
+ ```
276
+ $ python -m netlapse
277
+ Netlapse v0.1.0 starting
278
+ Storage backend: FileBackend at /home/user/.netlapse/data
279
+ Emulation enabled: 1738 device IPs loaded
280
+ Vault unlocked from NETLAPSE_VAULT_PASSWORD env var
281
+ Scheduler started (poll=15s, workers=2)
282
+ Netlapse ready — http://0.0.0.0:8888
283
+
284
+ Job 'config-backup': collecting from 65 devices (trigger=scheduled, history=1)
285
+ den-core-01: show running-config — 35361 bytes in 0.1s
286
+ den-core-02: show running-config — 33057 bytes in 0.1s
287
+ den-2-sw-01: show running-config — 47442 bytes in 0.1s
288
+ ...
289
+ Job 'config-backup' complete: 65/65 in 42.3s (history=1)
290
+ ```
291
+
292
+ ## Architecture
293
+
294
+ ```
295
+ ┌─────────────────────────────────────────────────────┐
296
+ │ FastAPI Application (port 8888) │
297
+ │ ├── /nodes, /node/* → Oxidized compat API │
298
+ │ ├── /api/v1/* → Native Netlapse API │
299
+ │ ├── /ws → WebSocket (live progress) │
300
+ │ └── / → Web UI (vanilla JS) │
301
+ └────────────────────┬────────────────────────────────┘
302
+
303
+ ┌────────────────────▼─────────────────────────────────┐
304
+ │ Scheduler (asyncio + ThreadPoolExecutor) │
305
+ │ ├── Poll loop: get_due_jobs() every 15s │
306
+ │ ├── Job queue: scheduled + API triggers │
307
+ │ ├── Worker threads: collect_device() per device │
308
+ │ ├── WS broadcast: per-device progress to all clients│
309
+ │ └── History: job_history + job_device_results │
310
+ └────────────────────┬─────────────────────────────────┘
311
+
312
+ ┌────────────────────▼─────────────────────────────────┐
313
+ │ Collection Engine │
314
+ │ ├── Collector (DCIM → vault → executor → storage) │
315
+ │ ├── SSH Client (Paramiko, legacy device support) │
316
+ │ ├── Emulation shim (NetEmulate mock devices) │
317
+ │ ├── Parse engine (tfsm-fire, 296 templates) │
318
+ │ └── Credential vault (Fernet/PBKDF2 encrypted) │
319
+ └────────────────────┬─────────────────────────────────┘
320
+
321
+ ┌────────────────────▼─────────────────────────────────┐
322
+ │ Storage Layer │
323
+ │ ├── Git backend (raw text + parsed JSON per commit) │
324
+ │ ├── File backend (directory tree, last-N rotation) │
325
+ │ ├── DCIM SQLite (devices, jobs, history) │
326
+ │ └── Vault SQLite (encrypted credentials, separate) │
327
+ └──────────────────────────────────────────────────────┘
328
+ ```
329
+
330
+ ### Scheduler Data Flow
331
+
332
+ ```
333
+ app.py lifespan → DB → Storage → Vault unlock → Parser init → Emulation → Scheduler.start()
334
+
335
+ ┌───────────────────────────────────────────────────────────┘
336
+
337
+ ├── Poll loop (asyncio, main thread)
338
+ │ └── get_due_jobs(now) → queue.put(("scheduled", slug))
339
+
340
+ ├── API triggers (async, main thread)
341
+ │ ├── POST /api/v1/jobs/{slug}/run → scheduler.enqueue_job()
342
+ │ ├── POST /api/v1/collect/{id} → scheduler.enqueue_device()
343
+ │ └── GET/PUT /node/next/{node} → scheduler.enqueue_device()
344
+
345
+ └── Queue consumer → ThreadPoolExecutor (worker thread)
346
+ ├── NetlapseDB(db_path) # thread-local DB connection
347
+ ├── db.start_job_run() # job_history row (status=running)
348
+ ├── for device in targets:
349
+ │ ├── collect_device() # SSH → Paramiko → raw output
350
+ │ ├── parser.enrich_snapshot() # tfsm-fire → parsed records + score
351
+ │ ├── _record_result() # store snapshot + update device status
352
+ │ ├── db.insert_device_result() # per-device history row
353
+ │ └── _broadcast() # WS events → collection view
354
+ ├── db.complete_job_run() # finalize counts + status
355
+ └── db.update_job_schedule() # next_run = now + interval
356
+ ```
357
+
358
+ ### End-to-End Data Flow
359
+
360
+ ```
361
+ CLI: netlapse collect --site den --emulate
362
+
363
+
364
+ Map importer: SC2 map.json → DCIM (sites, platforms, roles, devices)
365
+ │ Hostname parsing: border01.site1 → site=site1, den-core-01 → site=cal
366
+ │ Platform parsing: "Cisco IOS-XE 17.03.06" → cisco_ios_xe
367
+ │ Role inference: border* → router, tor* → leaf, -core- → core
368
+
369
+
370
+ Vault: unlock from NETLAPSE_VAULT_PASSWORD env var
371
+ │ PBKDF2-HMAC-SHA256 (480,000 iterations) → Fernet key derivation
372
+ │ resolve_shared_credentials() → (username, password) tuple
373
+
374
+
375
+ Collector: list_collection_targets(site_filter="cal") → 13 devices
376
+
377
+
378
+ Executor: build_ssh_config(v_device_detail row + credentials)
379
+ │ Maps dcim_platform fields → SSHClientConfig:
380
+ │ primary_ip4 → host
381
+ │ ssh_port → port
382
+ │ platform_profile → (used by tfsm-fire for template matching)
383
+ │ paging_disable_command → single platform-specific command
384
+ │ prompt_regex → prompt detection override
385
+ │ enable_command → enter privileged mode
386
+ │ legacy_ssh → device override > platform default > off
387
+ │ (DH group1, 3DES, forced ssh-rsa signatures)
388
+
389
+
390
+ Emulation shim: 172.16.48.60:22 → 127.0.0.1:10224 (den-core-01)
391
+ │ 1738 IPs loaded from ip_lookup.json
392
+ │ DNS intercept patches socket.getaddrinfo
393
+
394
+
395
+ SSHClient: connect → find_prompt → disable_pagination → show running-config
396
+ │ Prompt detected: "den-core-01#"
397
+ │ Output captured: 35,361 bytes in 0.1s
398
+
399
+
400
+ Parser: ParseEngine.enrich_snapshot(snapshot, platform_profile="arista_eos")
401
+ │ _clean_output: strip preamble, find last hostname echo, take output after
402
+ │ _build_filter: "arista_eos_show_ip_arp" → 1 matching template
403
+ │ find_best_template: score 80.2 → arista_eos_show_ip_arp
404
+ │ Snapshot.parsed_data = { records: [...] }
405
+ │ Snapshot.template_name = "arista_eos_show_ip_arp"
406
+ │ (vendor fallback if specific filter misses)
407
+
408
+
409
+ Storage: store_batch(snapshots)
410
+ │ → ~/.netlapse/data/site1/peer1-01.site1/arp.txt (raw CLI output)
411
+ │ → ~/.netlapse/data/site1/peer1-01.site1/arp.json (parsed records + template metadata)
412
+
413
+
414
+ DCIM: update_device_collection_status(device_id, "success", timestamp)
415
+ ```
416
+
417
+ ## What's Built — Module Details
418
+
419
+ ### Scheduler (`scheduler/__init__.py`)
420
+
421
+ Two components in one module:
422
+
423
+ **ConnectionManager** — WebSocket broadcast hub. Tracks connected clients, broadcasts JSON events to all. The `/ws` endpoint in `app.py` adds/removes connections; the scheduler broadcasts.
424
+
425
+ **Scheduler** — asyncio-based job loop backed by a `ThreadPoolExecutor` for blocking SSH work. Poll loop checks `get_due_jobs()` every 15 seconds (configurable). API routes push ad-hoc triggers onto an `asyncio.Queue`. Worker threads create their own SQLite connections (thread-local — SQLite connections can't cross thread boundaries). Each device collection produces a WebSocket event, giving the frontend real-time progress.
426
+
427
+ WebSocket events emitted during collection:
428
+
429
+ | Event | Payload | When |
430
+ |---|---|---|
431
+ | `collection_start` | `{ job, device_count, history_id }` | Job begins |
432
+ | `device_collected` | `{ device, status, command, commands, platform, bytes, duration, parsed, template }` | Each device completes |
433
+ | `collection_progress` | `{ collected, total }` | After each device |
434
+ | `collection_complete` | `{ job, collected, failed, device_count, duration, history_id }` | Job finishes |
435
+
436
+ The Jobs view auto-refreshes every 10 seconds and instantly on `collection_start`/`collection_complete` events. The Collection view shows per-device live progress with a progress bar and scrolling log.
437
+
438
+ ### DCIM Schema (`dcim/db_schema.py`)
439
+
440
+ Single SQLite database at `~/.netlapse/netlapse.db`. Schema version 10 (auto-migrates from v3 through v10). WAL mode for concurrent reads from the API while the scheduler writes.
441
+
442
+ **Tables:**
443
+
444
+ | Table | Purpose |
445
+ |---|---|
446
+ | `dcim_site` | Physical locations. `slug` = Oxidized group = git directory = one namespace everywhere |
447
+ | `dcim_manufacturer` | Hardware vendors (8 seeded: Cisco, Arista, Juniper, Palo Alto, Fortinet, F5, HP, Dell) |
448
+ | `dcim_platform` | OS/software. Each platform carries SSH behavior fields: `platform_profile`, `paging_disable_command`, `enable_command`, `prompt_regex`, `legacy_ssh` |
449
+ | `dcim_device_role` | Functional roles (12 seeded: router through border) |
450
+ | `dcim_device` | Devices. `credential_id`, `collection_enabled`, `legacy_ssh` (device override), `last_collection_status`, `credential_tested_at`, `credential_test_result` |
451
+ | `jobs` | Persistent job definitions: capture type, `commands` (JSON default), `command_map` (per-platform JSON, projected from `captures.yaml` for file-managed jobs), device filters, schedule, and `source` (`file` = catalog-owned/managed by sync; `seed` = shipped default, editable until a matching binding adopts it; NULL/`api` = created via API/UI) |
452
+ | `job_history` | Per-run execution records: trigger, status, counts, timing |
453
+ | `job_device_results` | Per-device outcome within a job run: status, error category, duration |
454
+
455
+ **Views:** `v_device_detail` (full join of device + site + platform + manufacturer + role + credential, includes both `device_legacy_ssh` and platform `legacy_ssh`), `v_site_summary`, `v_platform_summary`, `v_job_summary` (27 columns — all job fields including `command_map` + latest history with computed duration).
456
+
457
+ **Job CRUD methods:** `create_job()`, `update_job()` (field whitelist protects schedule fields), `delete_job()` (history preserved via FK SET NULL), `set_job_enabled()`.
458
+
459
+ **Device CRUD methods:** `update_device()` (14-field whitelist, diff-only updates, FK null normalization, legacy_ssh tri-state), `list_credentials()` (safe — no decrypted material), `list_roles()`, `list_platforms()`.
460
+
461
+ **Job history methods:** `start_job_run()` → history_id, `complete_job_run()`, `insert_device_result()`, `list_job_history()`, `get_job_run()` (includes nested device results).
462
+
463
+ **13 platforms with SSH behavior seeded.** The `slug` is what devices reference and what `captures.yaml` per-platform override keys are matched against; `profile` is the tfsm-fire template-matching key (the two differ on a few platforms, so use the slug when authoring overrides):
464
+
465
+ | Platform | Slug | Profile | Paging Command | Enable | Legacy |
466
+ |---|---|---|---|---|---|
467
+ | Cisco IOS | `cisco_ios` | `cisco_ios` | `terminal length 0` | `enable` | |
468
+ | Cisco IOS-XE | `cisco_ios_xe` | `cisco_xe` | `terminal length 0` | `enable` | |
469
+ | Cisco IOS-XR | `cisco_ios_xr` | `cisco_xr` | `terminal length 0` | | |
470
+ | Cisco NX-OS | `cisco_nxos` | `cisco_nxos` | `terminal length 0` | | |
471
+ | Cisco ASA | `cisco_asa` | `cisco_asa` | `terminal pager 0` | `enable` | |
472
+ | Arista EOS | `arista_eos` | `arista_eos` | `terminal length 0` | | |
473
+ | Juniper Junos | `juniper_junos` | `juniper_junos` | `set cli screen-length 0` | | |
474
+ | Palo Alto PAN-OS | `paloalto_panos` | `paloalto_panos` | `set cli pager off` | | |
475
+ | Fortinet FortiOS | `fortinet_fortios` | `fortinet` | `config system console\nset output standard\nend` | | |
476
+ | F5 TMOS | `f5_tmos` | `f5_tmsh` | `modify cli preference pager disabled` | | |
477
+ | HP ProCurve | `hp_procurve` | `hp_procurve` | `no page` | | ✓ |
478
+ | HP Comware | `hp_comware` | `hp_comware` | `screen-length disable` | | ✓ |
479
+ | Dell OS10 | `dell_os10` | `dell_os10` | `terminal length 0` | | |
480
+
481
+ **8 default jobs seeded** (as `source='seed'` — editable and collecting out of the box; sync promotes them to `file` if/when a `jobs.yaml` binding with the same slug is synced):
482
+
483
+ | Job | Capture Type | Commands | Interval |
484
+ |---|---|---|---|
485
+ | Config Backup | `config` | `show running-config` | 1 hour |
486
+ | ARP Table | `arp` | `show ip arp` | 30 min |
487
+ | BGP Summary | `bgp` | `show ip bgp summary` | 15 min |
488
+ | Interface Status | `interfaces` | `show interfaces` | 30 min |
489
+ | Route Table | `routes` | `show ip route` | 30 min |
490
+ | OSPF Neighbors | `ospf` | `show ip ospf neighbor` | 30 min |
491
+ | MAC Address Table | `mac` | `show mac address-table` | 30 min |
492
+ | LLDP Neighbors | `lldp` | `show lldp neighbors detail` | 30 min |
493
+
494
+ Per-platform command overrides (Junos `show configuration | display set`, Arista `show arp`, etc.) live in `captures.yaml` and are projected into each job's `command_map` at sync time.
495
+
496
+ ### Map Importer (`dcim/map_importer.py`)
497
+
498
+ Imports device inventory from Secure Cartography topology maps — the output of SC2's BFS discovery. The network is the source of truth. NetBox coexistence is optional.
499
+
500
+ The importer follows the NetAudit pattern: the patrol/topology map is the seed source. It creates sites and roles as needed, upserts devices (updates IP/platform if changed, inserts if new), and flags devices that may need legacy SSH algorithms.
501
+
502
+ ```bash
503
+ # CLI
504
+ python -m netlapse sync-map /path/to/map.json
505
+ python -m netlapse sync-map /path/to/map.json --exclude-prefix oob
506
+ python -m netlapse sync-map /path/to/map.json --site override-slug --dry-run
507
+ ```
508
+
509
+ ```python
510
+ # Programmatic
511
+ from netlapse.dcim.map_importer import sync_from_map
512
+ result = sync_from_map(db, "/path/to/map.json")
513
+ print(f"Created {result.created}, updated {result.updated}, skipped {result.skipped}")
514
+ ```
515
+
516
+ ### Credential Vault (`vault/`)
517
+
518
+ Ported from Secure Cartography's credential vault. Separate encrypted SQLite database at `~/.netlapse/vault.db` — not in the DCIM DB (correct security boundary).
519
+
520
+ **Encryption:** PBKDF2-HMAC-SHA256 (480,000 iterations) for key derivation, Fernet (AES-128-CBC + HMAC-SHA256) for symmetric encryption. Salt randomly generated per vault initialization.
521
+
522
+ **Credential types:** SSH (username + password and/or private key), SNMPv2c (community string), SNMPv3 (USM with auth/priv protocols). SSH is the primary path for collection; SNMP support is carried forward for future use.
523
+
524
+ **Headless unlock:** Set `NETLAPSE_VAULT_PASSWORD` env var for daemon/unattended operation. The bridge module (`vault/bridge.py`) auto-unlocks on first access. The app lifespan unlocks the vault before starting the scheduler.
525
+
526
+ **DCIM integration:** The bridge resolves `credential_id` from `dcim_device` → vault lookup → `(username, password)` tuple for the executor. `assign_credential_to_all()` bulk-assigns a named credential to matching devices.
527
+
528
+ ```bash
529
+ python -m netlapse vault init # Initialize with master password
530
+ python -m netlapse vault add-ssh lab -u admin -p admin --default # Add SSH credential
531
+ python -m netlapse vault list # List credentials (no secrets shown)
532
+ python -m netlapse vault assign lab # Assign to all devices
533
+ python -m netlapse vault assign lab --site den # Assign to one site
534
+ ```
535
+
536
+ ### Collection Pipeline (`core/collector.py`)
537
+
538
+ The bridge between "we have devices and credentials" and "config backups land on disk." Two entry points converge on the same pipeline:
539
+
540
+ - **`collect_now()`** — ad-hoc collection from CLI or API trigger. Specify filters, commands, and credential name.
541
+ - **`run_job()`** — job-based collection from the jobs table. Resolves job definition, target devices, and schedule.
542
+
543
+ Both resolve credentials from the vault, denl the executor, store results via the storage backend, and update DCIM collection history. Line endings are normalized (`\r\n` → `\n`) before storage — devices send Windows-style line endings, Netlapse stores Unix-only. The scheduler denls `collect_device()` directly for per-device granularity in history and WebSocket broadcast.
544
+
545
+ ```bash
546
+ # Ad-hoc collection
547
+ python -m netlapse collect --site den --emulate
548
+ python -m netlapse collect --role router --credential prod-ssh
549
+
550
+ # Job-based collection
551
+ python -m netlapse collect --job config-backup
552
+ python -m netlapse collect --job arp-table --site site1
553
+ ```
554
+
555
+ ### Storage Layer (`storage/`)
556
+
557
+ Abstract interface defines `Snapshot`, `StoredVersion`, `DiffResult` data classes and the method contract. Factory function `create_backend(config)` reads `storage.backend: file|git`.
558
+
559
+ **File backend** stores snapshots as `{site}/{device}/{capture_type}.txt` with last-N rotation into a `history/` subdirectory. Supports regex search across any capture type with line-number tracking and dynamic capture-type discovery. Configs are landing here in the current working state.
560
+
561
+ **Git backend** commits per-device with machine-parseable trailers:
562
+ ```
563
+ X-Netlapse-Device: border-rtr-01
564
+ X-Netlapse-Site: site1
565
+ X-Netlapse-Trigger: scheduled
566
+ X-Netlapse-Types: config,arp,bgp
567
+ ```
568
+
569
+ Trailers survive `git clone`, `git bundle`, and repo migrations — no side-car database required.
570
+
571
+ **Structured diff engine** matches parsed JSON records by capture-type-specific key fields:
572
+
573
+ | Capture Type | Key Field(s) |
574
+ |---|---|
575
+ | arp | ADDRESS |
576
+ | mac | DESTINATION_ADDRESS, VLAN |
577
+ | bgp | NEIGHBOR or BGP_NEIGH |
578
+ | ospf | NEIGHBOR_ID |
579
+ | interfaces | INTERFACE or INTF |
580
+ | routes | NETWORK or PREFIX |
581
+ | vlans | VLAN_ID |
582
+ | spanning, cdp, lldp | INTERFACE + NEIGHBOR |
583
+ | inventory | NAME, PID |
584
+
585
+ Falls back to full-record hash comparison when no key field is recognized.
586
+
587
+ ### Parser (`parser/`)
588
+
589
+ Structured CLI output parsing via tfsm-fire — the "output selects template" paradigm. Ported from Secure Cartography v2.5. The parse engine finds the best TextFSM template for raw CLI output automatically based on the output's structure, not manual template selection.
590
+
591
+ **Architecture:** Three layers —
592
+
593
+ 1. **`tfsm_fire.py`** (from SC2.5) — `TextFSMAutoEngine`. Thread-safe (thread-local SQLite connections). Scores each candidate template on four factors — record count (0–90), field richness (0–90), population rate (0–25), and consistency (0–15) — and selects the highest. The Template Lab normalizes the total to a 0–100 match score for display.
594
+ ![Template tester: a raw capture run through tfsm-fire, the engine auto-selecting the matching TextFSM template and rendering parsed records with the match score — the "output selects template" inversion in action](screenshots/netlapse_themed/template_lab_route_parsed.png)
595
+ 2. **`engine.py`** — `ParseEngine`. Wraps the engine with output cleaning, filter string construction, and a two-stage parse strategy:
596
+ - **Specific filter**: `{platform_profile}_{command}` (e.g. `arista_eos_show_ip_arp`) — tries 1-3 templates, fast.
597
+ - **Vendor fallback**: `{vendor}` only (e.g. `arista`) — tries all vendor templates when the command name doesn't align with the template naming convention.
598
+
599
+ 3. **`tfsm_templates.db`** — 1275 TextFSM templates in a SQLite database. 48 Arista, 143 Cisco IOS, 22 Juniper, plus NX-OS, ASA, and others.
600
+ ![Template browser: the 296-template tfsm-fire library browsable in-app, filterable by platform/vendor, each entry showing its template name and fields — the parser's matching surface made visible](screenshots/netlapse_themed/template_browser.png)
601
+ ![Device detail → Parsed Data tab: CDP neighbors rendered as a sortable table (neighbor, mgmt address, platform, interfaces, capabilities) with the auto-selected template cisco_ios_show_cdp_neighbors_detail and its match score](screenshots/netlapse_themed/parsed_cdp.png)
602
+
603
+ **Output cleaning** is critical — SSH session captures include command echo, banners, pagination responses, and trailing prompts that TextFSM can't handle. The cleaner uses a two-strategy approach:
604
+ - **Primary**: Find the LAST hostname-prefixed command echo (e.g. `router#show ip arp`, `user@switch> show arp`) and take everything after it.
605
+ - **Fallback**: Strip known preamble patterns (bare command lines, JUNOS version banners, `terminal length`, `set cli screen-length`, empty lines).
606
+
607
+ **Integration point:** The scheduler calls `parser.enrich_snapshot(snapshot, platform_profile)` after SSH collection and before storage. Both raw text and parsed JSON are written to disk as dual artifacts (`arp.txt` + `arp.json`).
608
+
609
+ **Diagnostic tool:** `parse_test.py` runs the full pipeline for a single device with verbose step-by-step output:
610
+
611
+ ```bash
612
+ # Full pipeline: SSH → clean → parse → store
613
+ python -m netlapse.parse_test peer1-01 --capture arp --emulate
614
+
615
+ # Skip SSH, test against a saved raw file
616
+ python -m netlapse.parse_test border01 --raw-file /tmp/border01-arp.txt --capture arp
617
+
618
+ # Lower threshold to see low-confidence matches
619
+ python -m netlapse.parse_test border01 --capture bgp --min-score 1
620
+ ```
621
+
622
+ ### SSH Module (`ssh/`)
623
+
624
+ Ported from Secure Cartography v2, split into three modules.
625
+
626
+ **`ssh/emulation.py`** — NetEmulate integration as a standalone module. Four resolution strategies for IP→mock device mapping: exact IP match → DNS resolution → FQDN-strip → hostname reverse-scan. Includes DNS intercept (monkey-patches `socket.getaddrinfo`). Enable once at startup (via CLI `--emulate` or config.yaml `emulation.enabled`), every SSHClient connection transparently redirects.
627
+
628
+ **`ssh/client.py`** — Paramiko wrapper. Invoke-shell only (required for most network devices). ANSI sequence filtering, prompt detection, RSA/Ed25519/ECDSA key loading from vault (PEM strings, not file paths at runtime), platform-specific pagination disable, enable mode entry. `SSHClientConfig` dataclass maps 1:1 to `dcim_platform` fields.
629
+
630
+ `LegacySSHSupport` auto-registers all available kex and host key handlers into Paramiko's `Transport._kex_info` and `Transport._key_info` dicts at first connection. Paramiko 3.x+ (especially on Python 3.14) ships with incomplete handler dictionaries — algorithm names are offered during negotiation but their handler classes aren't registered, causing `KeyError` on `connect()`. The registration discovers every kex/key class Paramiko ships via `importlib`, registers what's missing, and builds preference lists from only what's actually registered. Runs once per process, idempotent. Handles mixed fleets: modern devices negotiate curve25519/ecdh, legacy Cisco-1.25 devices fall back to DH group1/3DES, OpenSSH 6.x servers with only ssh-rsa host keys all connect without retry or fallback logic. When `legacy_mode` is active, also disables `rsa-sha2-512` and `rsa-sha2-256` pubkey signature algorithms to force `ssh-rsa` (SHA-1) — required for pre-2014 SSH servers that don't support RFC 8332 or advertise `server-sig-algs`.
631
+
632
+ **`ssh/executor.py`** — The bridge. `build_ssh_config()` takes a `v_device_detail` row and credentials, produces an `SSHClientConfig`. Device-level `legacy_ssh` overrides platform default via `_resolve_legacy_ssh()`. `collect_device()` connects, detects prompt, disables pagination, optionally enters enable mode, runs commands, and returns `DeviceResult` with `Snapshot` objects. `test_device_auth()` connects and disconnects without commands, capturing the full Paramiko negotiation trace via a temporary log handler — returns `AuthTestResult` with transport metadata (SSH banner, KEX algorithm, cipher, auth method) and debug log. 12 error categories: `connection_refused`, `connection_timeout`, `auth_failure`, `host_unreachable`, `dns_failure`, `ssh_protocol`, `shell_timeout`, `prompt_detection`, `command_timeout`, `command_error`, `emulation_miss`, `unknown`. Consecutive-failure circuit breaker for batch runs.
633
+
634
+ ### Web UI (`web/`)
635
+
636
+ Single-page application served by FastAPI at `/`. Vanilla JS with ES modules — no build step, no bundler, no framework. ~3,300 lines across 13 files. Tested live against 82 devices across 3 data centers.
637
+
638
+ **Architecture:** One HTML shell loads `app.js`, which manages a hash router and dynamically imports each view module on demand. Every view implements the same lifecycle contract: `render()` returns an HTML string, `init()` fetches data and wires events after the HTML is in the DOM, `destroy()` cleans up timers and WebSocket subscriptions when navigating away.
639
+
640
+ **Views:**
641
+
642
+ | View | Route | API Endpoints | Purpose |
643
+ |---|---|---|---|
644
+ | Dashboard | `#/dashboard` | `/health`, `/sites`, `/jobs` | Stat cards, site grid with device counts, recent collections table |
645
+ | Devices | `#/devices` | `/devices`, `/sites` | Filterable inventory table — search, site, status filters. URL param pre-filtering (`#/devices?site=site1`) |
646
+ | Device Detail | `#/device/{id}` | `/devices/{id}`, `PATCH /devices/{id}`, `/devices/{id}/test-auth`, `/snapshots`, `/snapshots/latest`, `/diff`, `/search/capture_types`, `/platforms`, `/roles`, `/credentials`, `/sites` | Four tabs with capture type pill selector. Raw output per type, parsed data table with click-to-sort columns and status color-coding, snapshot timeline, semantic diff per capture type. Device edit modal (identity, collection, metadata) with credential override, legacy SSH tri-state, SSH auth test with debug log |
647
+ | Jobs | `#/jobs` | `/jobs`, `/jobs/{slug}/run`, `POST /jobs`, `PUT /jobs/{slug}`, `DELETE /jobs/{slug}`, `PUT /jobs/{slug}/enabled`, `/platforms` | Full CRUD: create/edit via modal form with per-platform command map editor, enable/disable toggle, delete with confirmation. Auto-refresh (10s), WS-driven instant updates |
648
+ | Config Search | `#/search` | `POST /api/v1/search`, `GET /api/v1/search/capture_types`, `/devices/{id}/snapshots/latest` | Capture-type selector (config, arp, bgp, routes, interfaces), regex search with line numbers, match highlighting, full-output detail modal with ▲▼ match navigation |
649
+ | Live Collection | `#/collection` | `/jobs`, `/jobs/{slug}/run`, `WS /ws` | Job selector, trigger button, real-time progress via WebSocket — per-device log, progress bar, summary |
650
+
651
+ **Key modules:**
652
+
653
+ **`api.js`** (86 lines) — Every `/api/v1` and Oxidized-compat endpoint in one file. Includes job CRUD methods (`createJob`, `updateJob`, `deleteJob`, `setJobEnabled`, `jobHistory`), device edit (`updateDevice`), auth test (`testAuth`), and reference data (`platforms`, `roles`, `credentials`). Views import typed convenience methods and never construct URLs.
654
+
655
+ **`components.js`** (125 lines) — Pure functions returning HTML strings. `badge(status)` maps status strings to colored indicators, `statCard()` renders dashboard metrics, `code()` wraps text in monospace tags, and formatters handle uptime, intervals, relative timestamps, and byte counts. Icon set includes play, edit, trash, toggle on/off for the jobs CRUD UI.
656
+
657
+ **`ws.js`** (136 lines) — WebSocket manager wrapping `/ws` with auto-reconnect (exponential backoff, 2s → 30s cap) and event dispatch. Views subscribe during `init()` and receive an unsubscribe function to denl during `destroy()`. The collection view receives four event types from the scheduler: `collection_start`, `device_collected` (now includes `parsed` flag and `template` name), `collection_progress`, `collection_complete`. The jobs view subscribes to `collection_start` and `collection_complete` for instant status updates.
658
+
659
+ **`netlapse.css`** (1,032 lines) — Enterprise light theme. IBM Plex Sans/Mono typography. CSS custom properties for theming — change `--blue` and every button, badge, and link updates. Dark sidebar, light content area. Component classes for cards, tables, badges, stat cards, diff blocks, snapshot timelines, progress bars, form grids, modal dialogs, filter pills, and the search detail modal with match navigation.
660
+
661
+ **Design decisions:**
662
+
663
+ - **Vanilla JS + ES modules** — no webpack, no node, no build tooling. A network engineer opens `views/devices.js` and sees HTML strings and fetch denls.
664
+ - **Dynamic `import()`** — the browser only loads the JS for the view being displayed. Dashboard never loads the collection view's WebSocket code.
665
+ - **`render → init → destroy` lifecycle** — same pattern as a PyQt6 widget (`setupUi → populate → cleanup`). Familiar to anyone who's written desktop apps.
666
+ - **IBM Plex Sans/Mono** — enterprise typography that renders IPs, hostnames, and config blocks alongside prose without visual conflict.
667
+ - **URL param pre-filtering** — clicking a site card on the dashboard navigates to `#/devices?site=cal` and the devices view reads the param on init. Deep-linkable.
668
+
669
+ ## API Detail
670
+ ![Auto-generated OpenAPI 3.1 docs at /docs — two endpoint groups side by side: oxidized-compat (the drop-in /nodes, /node/fetch, /node/next, conf_search surface) and netlapse (the native /api/v1 REST API)](screenshots/netlapse_themed/openapi.png)
671
+ ### Oxidized Compatibility
672
+
673
+ Netlapse implements the Oxidized REST surface LibreNMS uses, served at the root
674
+ path. LibreNMS needs no special configuration beyond pointing `oxidized.url` at
675
+ Netlapse — but Netlapse needs `auth.oxidized_public: true`, because LibreNMS
676
+ stores only a bare URL and can't send credentials. The full Config-tab path
677
+ (metadata, fetch, version list, version view, diffs) is validated end to end
678
+ against a live LibreNMS; the Refresh button works as an opt-in trigger. See
679
+ [README_LibreNMS_Integration.md](README_LibreNMS_Integration.md) for the wiring,
680
+ auth modes, and naming requirements.
681
+
682
+ | Oxidized Endpoint | Method | Status |
683
+ |---|---|---|
684
+ | `/nodes` | GET | ✅ Optional `?group=` filter by site slug |
685
+ | `/node/show/{node}` | GET | ✅ Node metadata (name, ip, model, group, last) |
686
+ | `/node/fetch/{node}` | GET | ✅ Resolves by name or IP |
687
+ | `/node/fetch/{group}/{node}` | GET | ✅ Scoped to site slug |
688
+ | `/node/version` | GET | ✅ Git commits or file timestamps |
689
+ | `/node/version/view` | GET/POST | ✅ View a specific version (plain text) |
690
+ | `/node/version/diffs` | GET | ✅ Unified diff between two versions |
691
+ | `/reload` | GET | ✅ Returns device count |
692
+ | `/node/next/{node}` | GET/PUT | ✅ Enqueues collection — **opt-in**, IP-scoped (Refresh button); LibreNMS sends PUT |
693
+ | `/nodes/conf_search` | POST | ✅ Regex search across all configs (gated; native search is multi-type) |
694
+
695
+ The read routes are exposed unauthenticated under `oxidized_public` (the upstream
696
+ Oxidized trust model — no app-layer auth, trusted network boundary). The trigger
697
+ route (`/node/next`) is a side effect and stays off until you list source IPs in
698
+ `oxidized_public_trigger_ips`; that grant is a strict subset of read access.
699
+ `/nodes/conf_search` and every admin/vault route remain behind the normal gate.
700
+
701
+ ### Native API
702
+
703
+ Swagger UI at `/docs`.
704
+
705
+ | Endpoint | Method | Status |
706
+ |---|---|---|
707
+ | `/api/v1/health` | GET | ✅ Version, uptime, device/job counts |
708
+ | `/api/v1/status/scheduler` | GET | ✅ Running state, active jobs, queue depth, WS clients |
709
+ | `/api/v1/devices` | GET | ✅ Filterable by site, platform, role, status |
710
+ | `/api/v1/devices/{id}` | GET | ✅ Full detail from `v_device_detail` |
711
+ | `/api/v1/devices/{id}` | PATCH | ✅ Update device (14-field whitelist, diff-only) |
712
+ | `/api/v1/devices/{id}/test-auth` | POST | ✅ SSH auth test with debug log capture |
713
+ | `/api/v1/sites` | GET | ✅ Site list with device counts |
714
+ | `/api/v1/platforms` | GET | ✅ Platform list with device counts |
715
+ | `/api/v1/roles` | GET | ✅ Role list with device counts |
716
+ | `/api/v1/credentials` | GET | ✅ Credential list (safe — id, name, username only) |
717
+ | `/api/v1/devices/{id}/snapshots` | GET | ✅ List collection snapshots |
718
+ | `/api/v1/devices/{id}/snapshots/latest` | GET | ✅ Latest raw text + parsed JSON |
719
+ | `/api/v1/devices/{id}/snapshots/{sha}` | GET | ✅ Specific version |
720
+ | `/api/v1/devices/{id}/diff` | GET | ✅ Structured diff |
721
+ | `/api/v1/jobs` | GET | ✅ Job list with last-run summary (v_job_summary) |
722
+ | `/api/v1/jobs` | POST | ✅ Create new job |
723
+ | `/api/v1/jobs/{slug}` | GET | ✅ Single job definition |
724
+ | `/api/v1/jobs/{slug}` | PUT | ✅ Update job (field whitelist) |
725
+ | `/api/v1/jobs/{slug}` | DELETE | ✅ Delete job (history preserved) |
726
+ | `/api/v1/jobs/{slug}/run` | POST | ✅ Trigger immediate run via scheduler |
727
+ | `/api/v1/jobs/{slug}/enabled` | PUT | ✅ Enable/disable job |
728
+ | `/api/v1/jobs/{slug}/history` | GET | ✅ Run history for a job |
729
+ | `/api/v1/history/{id}` | GET | ✅ Single run with per-device results |
730
+ | `/api/v1/collect/{device_id}` | POST | ✅ Trigger single-device collection |
731
+ | `/api/v1/search` | POST | ✅ Multi-capture-type regex search with line numbers |
732
+ | `/api/v1/search/capture_types` | GET | ✅ List all stored capture types dynamically |
733
+
734
+ ## Configuration
735
+
736
+ ```yaml
737
+ # ~/.netlapse/config.yaml
738
+ listen:
739
+ host: 0.0.0.0
740
+ port: 8888
741
+
742
+ storage:
743
+ backend: file # file or git
744
+ path: ~/.netlapse/data
745
+ max_versions: 50 # file backend only
746
+
747
+ dcim_db: ~/.netlapse/netlapse.db
748
+
749
+ scheduler:
750
+ poll_interval: 15 # seconds between due-job checks (default: 15)
751
+ max_workers: 2 # concurrent collection threads (default: 2)
752
+
753
+ emulation:
754
+ enabled: true # redirect SSH to NetEmulate mock devices
755
+ # lookup_path: ~/netemulate/ip_lookup.json # auto-searches defaults if omitted
756
+ # bind_host: 127.0.0.1 # default
757
+
758
+ parser:
759
+ # db_path: ~/.netlapse/tfsm_templates.db # auto-detected if omitted
760
+ # min_score: 15.0 # minimum template match score (0-100)
761
+ ```
762
+
763
+ Config path overridable with `NETLAPSE_CONFIG` env var. Falls back to sensible defaults if no config file exists.
764
+
765
+ Two more files live alongside `config.yaml` in the same directory (default `~/.netlapse/`) and drive collection rather than the daemon itself:
766
+
767
+ - **`captures.yaml`** — what to collect and the per-platform command syntax
768
+ - **`jobs.yaml`** — bindings: which capture runs against which devices
769
+
770
+ Both are optional — absent, the daemon runs on whatever job definitions are already in the DB. See [Data-Driven Collection](#data-driven-collection). Their directory can be overridden with `definitions_dir` in `config.yaml`; otherwise it follows `NETLAPSE_CONFIG`.
771
+
772
+ Environment variables:
773
+ - `NETLAPSE_VAULT_PASSWORD` — master password for headless vault unlock (required for scheduler)
774
+ - `NETLAPSE_CONFIG` — config file path override
775
+ - `NETLAPSE_ADMIN_PASSWORD` - force set admin pw
776
+
777
+ ## Remaining Work
778
+
779
+ ### Phase 1 — Scheduler ✅
780
+
781
+ Complete. 551 lines. Asyncio poll loop + ThreadPoolExecutor + WebSocket broadcast. Job CRUD API (create, update, delete, enable/disable). Job history with per-device results. Auto-migrating schema (v3 → v4). All API stubs wired. Running in production against 82 devices.
782
+
783
+ ### Phase 2 — Structured Parsing ✅
784
+
785
+ Complete. Parser engine ported from SC2.5 (`tfsm_fire.py` + `engine.py`). 296 TextFSM templates (48 Arista, 143 Cisco IOS, 22 Juniper). Integrated into scheduler — parsing runs inline after SSH collection, before storage. Dual artifacts written to disk: raw `.txt` + parsed `.json`. Output cleaning handles multi-vendor SSH session transcripts (Cisco `#`, Junos `>`, Arista `#`, with banners, pagination responses, and command echo). Device detail view renders parsed data as sortable tables with capture type selection. File backend handles write-through of parsed JSON even when raw text is unchanged (covers parser-added-after-first-collection scenario).
786
+
787
+ ### Phase 3 — Web UI ✅
788
+
789
+ Complete. ~3,300 lines, 13 files, zero new dependencies. Six views with hash routing, dynamic module loading, and WebSocket integration. Jobs view has full CRUD (create, edit, enable/disable, delete via modal forms with two-column grid layout, capture type datalist, interval picker, collapsible device filters, auto-slug generation). Device detail view has capture type pill selector across Raw Output, Parsed Data, and Semantic Diff tabs — parsed data renders as sortable tables with status color-coding. Config search supports all capture types with regex, line numbers, and full-output detail modal.
790
+
791
+ ### Phase 4 — Multi-Vendor & Device Management (partial) ✅
792
+
793
+ Per-platform command resolution: a job's `commands` column holds the default command list and `command_map` holds per-platform overrides (`platform_slug` → command array). These columns are no longer authored by hand — for file-managed jobs they're **projected from `captures.yaml`** at sync time (the capture's `default` becomes `commands`, every other platform key becomes a `command_map` entry), so the override surface is edited in one vendor-neutral place rather than per job. API-created jobs still write the same two columns directly, so both kinds resolve identically.
794
+
795
+ Resolution itself is a single shared function — `registry.resolver.resolve_commands(default_commands, command_map, platform_slug)` — that **both the scheduler and the CLI collector call**. That sharing is the point: before it existed, the scheduler honored `command_map` inline while the CLI collector sent the default commands to every platform, so `netlapse collect --job` and the daemon disagreed on Junos boxes. With one resolver, the two paths cannot diverge. Schema is now v10. `jobs.source` has three states — `file` (catalog-owned, read-only, tombstoned when removed), `seed` (a shipped default no file owns yet: editable, never tombstoned, auto-adopted to `file` when a matching binding is synced), and `NULL`/`api` (hand-made, never touched by sync). The defaults are seeded `seed` so they collect and stay editable with no catalog on disk, yet a catalog edit takes effect the moment its slug is synced. Auto-migrates from v3 through v10 (v10 repairs DBs left at an interim `file` seed value).
796
+
797
+ Device edit: full CRUD modal on the device detail view. Three sections — Identity (name, status, IPs, platform, site, role), Collection (credential override, SSH port, legacy SSH tri-state, collection enabled), Metadata (serial, asset tag, description, comments). Diff-only saves — only changed fields are sent in the PATCH payload. Reference data (sites, platforms, roles, credentials) lazy-loaded once on first edit.
798
+
799
+ Per-device SSH controls: `credential_id` overrides the shared vault default per device. `legacy_ssh` column on `dcim_device` (nullable — NULL inherits platform default, 0 forces off, 1 forces on). The SSH client now passes `disabled_algorithms={'pubkeys': ['rsa-sha2-512', 'rsa-sha2-256']}` when legacy mode is active, forcing `ssh-rsa` signatures for pre-2014 OpenSSH servers that don't support RFC 8332. (Introduced in schema v6; current schema is v8.)
800
+
801
+ SSH auth test with debug: `POST /devices/{id}/test-auth` connects, detects prompt, captures transport negotiation details (SSH banner, KEX algorithm, cipher, auth method), and disconnects. A temporary log handler captures DEBUG-level output from Paramiko and the SSH client during the test, returning the full negotiation trace. Updates `credential_tested_at` and `credential_test_result` on the device. UI shows inline results with a collapsible debug log panel — the exact output that diagnosed the rsa-sha2-512 signature mismatch on OpenSSH 6.2 Juniper border routers.
802
+
803
+ ### Phase 5 — Remaining
804
+
805
+ NetBox sync (optional, bidirectional), syslog-triggered collection, external device sources (LibreNMS/NetBox API), backfill CLI (re-parse existing raw text through the parser), per-template timeout guard in tfsm-fire for pathological regex cases, admin UI for template management, tfsm-fire template database auto-download on first run.
806
+
807
+ Shipped since this list was first written: application authentication (session-cookie login, scrypt hashing, LDAP/AD with directory-authoritative roles), the data-driven definition registry, and interval-change rescheduling (a definition sync re-derives `next_run` when a capture's interval changes, so it takes effect that cycle rather than the next).
808
+
809
+ ## Design Decisions
810
+
811
+ **Why the network is the source of truth:** The resistance of NetBox adoption at the operator level drove an architectural pivot. VelocityCMDB required a populated NetBox; Netlapse doesn't. Each tool in the suite carries enough DCIM to operate autonomously. If NetBox exists, sync to it. If it doesn't, the tool still works. The SC2 topology map is the seed source — `pip install`, point at the network, start collecting.
812
+
813
+ **Why definitions are files-authoritative but DB-synced:** A YAML catalog is what an operator should edit — diffable, reviewable, version-controllable, no code change to add a vendor. But the running daemon needs state that doesn't belong in a file: the live enable/disable toggle flipped in the UI, the schedule (`last_run`/`next_run`), and job history. So definitions are authoritative in the files and projected into the jobs table on startup, while that runtime state stays authoritative in the DB and is never clobbered by a sync. The DB therefore doubles as the last-good cache: if the catalog fails validation on a restart, the sync is skipped and the daemon runs on the definitions already in the DB. A typo degrades to "ran with last good definitions," never to "stopped collecting."
814
+
815
+ **Why platforms stayed in the DB while captures and jobs moved to files:** Devices reference their platform by `platform_slug`, and `dcim_platform` already carries the live SSH behavior each platform needs — prompt regex, paging-disable command, enable command, legacy-SSH flag. Putting platforms in a file too would create a second source of platform truth that could drift from the one devices actually resolve against. Instead, a capture's per-platform command keys are validated against the real `dcim_platform` list at load time, so an override key that no device could ever match is caught against the source that decides matching. One platform table, no drift. (Making the file authoritative for SSH behavior too is a deliberate later step — it means deciding what wins when file and DCIM disagree.)
816
+
817
+ **Why no netmiko:** The SC2 SSH client is 700 lines of battle-tested Paramiko logic — ANSI filtering, prompt detection, legacy algorithm support, invoke-shell for devices that reject exec channels. It auto-registers kex and host key handlers that Paramiko 3.x strips from its lookup dicts, handling mixed-fleet algorithm negotiation (modern curve25519 through legacy DH group1) without retry chains. Netmiko would be a dependency that does less than what's already built.
818
+
819
+ **Why SQLite, not PostgreSQL:** Single-file deployment. No database server. The DCIM handles 500+ devices trivially. WAL mode handles concurrent API reads while the scheduler writes. Worker threads create their own connections — SQLite connections can't cross thread boundaries, but concurrent connections with WAL mode are safe.
820
+
821
+ **Why two storage backends:** Not everyone has or wants git. The file backend lets someone start collecting in 60 seconds. When they want versioning, they switch one config line.
822
+
823
+ **Why dual artifacts (raw + parsed):** The raw text is what you `grep` at 2 AM. The parsed JSON is what makes Netlapse different — structured diffs, semantic change detection, operational state awareness.
824
+
825
+ **Why a separate vault database:** The credential vault lives at `~/.netlapse/vault.db`, separate from the DCIM at `~/.netlapse/netlapse.db`. Different security boundary — the vault is encrypted, the DCIM is not. The `credential_id` on `dcim_device` is a logical reference resolved at runtime through the vault bridge.
826
+
827
+ **Why commit trailers instead of a metadata database:** Git trailers survive `git clone`, `git bundle`, repo migrations, and backup/restore. A side-car database can get out of sync.
828
+
829
+ **Why `site_slug` = Oxidized group = git directory:** Three systems that need to agree on a namespace. Making them the same string eliminates mapping tables.
830
+
831
+ **Why thread-local DB connections in the scheduler:** SQLite connections can't cross thread boundaries (`check_same_thread=True` by default). The scheduler's worker threads create their own `NetlapseDB(db_path)` instances and close them after each job. The poll loop runs in the main asyncio thread and uses the shared connection. WAL mode ensures concurrent reads don't block.
832
+
833
+ ## Design Philosophy
834
+
835
+ The stack is deliberately inheritable: FastAPI, SQLite, vanilla JS, Paramiko — mainstream frameworks, no exotic dependencies. Vanilla JS is intentional (the next person maintaining this is a network engineer, not a frontend developer). All projects have architecture docs. pip-installable. GPL licensed so it stays open.
836
+
837
+ ## Portable Components
838
+
839
+ Netlapse reuses battle-tested modules from the author's network automation stack:
840
+
841
+ | Component | Origin | Status | Purpose |
842
+ |---|---|---|---|
843
+ | SSH Client | Secure Cartography v2 | ✅ Ported | Paramiko wrapper with auto-registered algorithm handlers, key+password auth, ANSI filtering, prompt detection |
844
+ | Emulation Shim | Secure Cartography v2 | ✅ Ported | NetEmulate mock device redirection for testing |
845
+ | SSH Executor | VelocityCollector | ✅ Adapted | DCIM → SSH → Snapshot pipeline with 12-category error handling |
846
+ | DCIM Schema | VelocityCollector | ✅ Ported | NetBox-aligned SQLite (sites, platforms, roles, devices, jobs, history) |
847
+ | Credential Vault | Secure Cartography v2 | ✅ Ported | Fernet-encrypted SQLite, headless unlock, DCIM bridge |
848
+ | tfsm-fire | Secure Cartography v2.5 | ✅ Ported | TextFSM auto-template selection — output selects template |
849
+ | Parse Engine | New for Netlapse | ✅ Built | Output cleaning, filter cascade, vendor fallback, Snapshot enrichment |
850
+ | Map Importer | New for Netlapse | ✅ Built | SC2 topology maps → DCIM device inventory, hostname.site preservation |
851
+ | Collection Pipeline | New for Netlapse | ✅ Built | End-to-end: DCIM → vault → executor → parser → storage |
852
+ | Scheduler | New for Netlapse | ✅ Built | asyncio + ThreadPoolExecutor, WS broadcast, inline parsing, job history |
853
+ | Web UI | New for Netlapse | ✅ Built | SPA: dashboard, inventory, parsed data tables, job CRUD, live collection |
854
+
855
+ ## Dependencies
856
+
857
+ ```
858
+ fastapi>=0.110 # Web framework
859
+ uvicorn[standard]>=0.27 # ASGI server
860
+ python-multipart>=0.0.9 # Form parsing (Oxidized compat endpoints)
861
+ paramiko>=3.4 # SSH (SC2 client)
862
+ gitpython>=3.1 # Git storage backend
863
+ deepdiff>=7.0 # Structured diff engine
864
+ pyyaml>=6.0 # Configuration
865
+ cryptography>=42.0 # Vault encryption
866
+ textfsm>=1.1 # Template parsing (tfsm-fire structured output)
867
+ ```
868
+
869
+
870
+ ### Python Compatibility
871
+
872
+ Tested on Python 3.12 and 3.14. The SSH client's `LegacySSHSupport` handles Paramiko algorithm registration differences across Python versions automatically — no version-specific configuration needed.
873
+
874
+ ## License
875
+
876
+ GPLv3
877
+
878
+ ## Author
879
+ ````
880
+ Scott Peterman — [Full Stack Net Ops Developer](https://scottpeterman.github.io)