@titan-design/brain 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ // src/services/web-extract.ts
2
+ import { Readability } from "@mozilla/readability";
3
+ import TurndownService from "turndown";
4
+ import { JSDOM } from "jsdom";
5
+ import { createHash } from "crypto";
6
+ var TRACKING_PARAMS = /* @__PURE__ */ new Set([
7
+ "utm_source",
8
+ "utm_medium",
9
+ "utm_campaign",
10
+ "utm_term",
11
+ "utm_content",
12
+ "fbclid",
13
+ "gclid",
14
+ "ref",
15
+ "source",
16
+ "mc_cid",
17
+ "mc_eid"
18
+ ]);
19
+ function normalizeUrl(rawUrl) {
20
+ const url = new URL(rawUrl);
21
+ url.hostname = url.hostname.toLowerCase();
22
+ for (const param of TRACKING_PARAMS) {
23
+ url.searchParams.delete(param);
24
+ }
25
+ url.searchParams.sort();
26
+ let result = url.toString();
27
+ if (result.endsWith("/") && url.pathname !== "/") {
28
+ result = result.slice(0, -1);
29
+ }
30
+ return result;
31
+ }
32
+ function extractMetadata(html) {
33
+ const dom = new JSDOM(html);
34
+ const doc = dom.window.document;
35
+ const getMeta = (selectors) => {
36
+ for (const sel of selectors) {
37
+ const el = doc.querySelector(sel);
38
+ if (el) return el.getAttribute("content");
39
+ }
40
+ return null;
41
+ };
42
+ return {
43
+ title: getMeta(['meta[property="og:title"]', 'meta[name="title"]']) ?? doc.title ?? null,
44
+ author: getMeta(['meta[name="author"]', 'meta[property="article:author"]']),
45
+ description: getMeta(['meta[property="og:description"]', 'meta[name="description"]']),
46
+ siteName: getMeta(['meta[property="og:site_name"]']),
47
+ publishedDate: getMeta(['meta[property="article:published_time"]', 'meta[name="date"]'])
48
+ };
49
+ }
50
+ function extractFromHtml(html, url) {
51
+ const dom = new JSDOM(html, { url });
52
+ const reader = new Readability(dom.window.document);
53
+ const article = reader.parse();
54
+ const turndown = new TurndownService({
55
+ headingStyle: "atx",
56
+ codeBlockStyle: "fenced"
57
+ });
58
+ const markdown = article?.content ? turndown.turndown(article.content) : "";
59
+ const metadata = extractMetadata(html);
60
+ if (article?.title) metadata.title = article.title;
61
+ if (article?.byline) metadata.author = article.byline;
62
+ if (article?.siteName) metadata.siteName = article.siteName;
63
+ const contentHash = createHash("sha256").update(markdown).digest("hex");
64
+ return {
65
+ markdown,
66
+ metadata,
67
+ normalizedUrl: normalizeUrl(url),
68
+ contentHash
69
+ };
70
+ }
71
+ var DEFAULT_OPTIONS = {
72
+ timeout: 1e4,
73
+ maxSize: 5 * 1024 * 1024,
74
+ userAgent: "brain/1.0"
75
+ };
76
+ async function fetchAndExtract(url, options) {
77
+ const opts = { ...DEFAULT_OPTIONS, ...options };
78
+ const parsed = new URL(url);
79
+ if (!["http:", "https:"].includes(parsed.protocol)) {
80
+ throw new Error(`Unsupported protocol: ${parsed.protocol} (only http/https)`);
81
+ }
82
+ const controller = new AbortController();
83
+ const timeoutId = setTimeout(() => controller.abort(), opts.timeout);
84
+ try {
85
+ const response = await fetch(url, {
86
+ signal: controller.signal,
87
+ headers: { "User-Agent": opts.userAgent },
88
+ redirect: "follow"
89
+ });
90
+ if (!response.ok) {
91
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
92
+ }
93
+ const contentLength = response.headers.get("content-length");
94
+ if (contentLength && Number(contentLength) > opts.maxSize) {
95
+ throw new Error(`Content too large: ${contentLength} bytes (max ${opts.maxSize})`);
96
+ }
97
+ const html = await response.text();
98
+ if (html.length > opts.maxSize) {
99
+ throw new Error(`Content too large: ${html.length} bytes (max ${opts.maxSize})`);
100
+ }
101
+ return extractFromHtml(html, url);
102
+ } finally {
103
+ clearTimeout(timeoutId);
104
+ }
105
+ }
106
+ export {
107
+ extractFromHtml,
108
+ extractMetadata,
109
+ fetchAndExtract,
110
+ normalizeUrl
111
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@titan-design/brain",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "type": "module",
5
5
  "description": "Developer second brain with hybrid RAG search",
6
6
  "license": "MIT",
@@ -23,31 +23,41 @@
23
23
  "test:watch": "vitest",
24
24
  "typecheck": "tsc --noEmit",
25
25
  "lint": "eslint src __tests__ --no-error-on-unmatched-pattern",
26
+ "format": "prettier --write \"src/**/*.ts\" \"__tests__/**/*.ts\"",
27
+ "format:check": "prettier --check \"src/**/*.ts\" \"__tests__/**/*.ts\"",
28
+ "test:coverage": "vitest run --coverage",
26
29
  "postinstall": "node scripts/postinstall.js",
27
30
  "preuninstall": "node scripts/preuninstall.js"
28
31
  },
29
32
  "dependencies": {
30
- "commander": "^13.1.0",
31
33
  "@commander-js/extra-typings": "^13.1.0",
34
+ "@mozilla/readability": "^0.6.0",
32
35
  "better-sqlite3": "^11.8.1",
33
- "sqlite-vec": "^0.1.6",
34
- "ollama": "^0.5.14",
36
+ "commander": "^13.1.0",
37
+ "env-paths": "^3.0.0",
35
38
  "gray-matter": "^4.0.3",
36
- "env-paths": "^3.0.0"
39
+ "jsdom": "^28.1.0",
40
+ "ollama": "^0.5.14",
41
+ "sqlite-vec": "^0.1.6",
42
+ "turndown": "^7.2.2"
37
43
  },
38
44
  "optionalDependencies": {
39
45
  "@huggingface/transformers": "^3.3.3"
40
46
  },
41
47
  "devDependencies": {
42
- "typescript": "^5.7.3",
43
- "vitest": "^3.0.5",
44
- "tsup": "^8.3.6",
48
+ "@eslint/js": "^9.20.0",
45
49
  "@types/better-sqlite3": "^7.6.13",
50
+ "@types/jsdom": "^27.0.0",
46
51
  "@types/node": "^22.13.4",
47
- "tsx": "^4.19.2",
52
+ "@types/turndown": "^5.0.6",
53
+ "@vitest/coverage-v8": "^3.2.4",
48
54
  "eslint": "^9.20.0",
49
- "@eslint/js": "^9.20.0",
50
- "typescript-eslint": "^8.24.1"
55
+ "prettier": "^3.8.1",
56
+ "tsup": "^8.3.6",
57
+ "tsx": "^4.19.2",
58
+ "typescript": "^5.7.3",
59
+ "typescript-eslint": "^8.24.1",
60
+ "vitest": "^3.0.5"
51
61
  },
52
62
  "publishConfig": {
53
63
  "access": "public",
package/skill/SKILL.md CHANGED
@@ -5,7 +5,7 @@ description: Search and manage your second brain knowledge base. Use when the us
5
5
 
6
6
  # Brain -- Knowledge Management
7
7
 
8
- A CLI for managing a developer second brain with hybrid BM25 + vector search over markdown notes.
8
+ A CLI for managing a developer second brain with hybrid BM25 + vector search, LLM-powered memory extraction, and temporal intelligence over markdown notes.
9
9
 
10
10
  ## When to Use
11
11
 
@@ -13,6 +13,8 @@ A CLI for managing a developer second brain with hybrid BM25 + vector search ove
13
13
  - User wants to save something to their knowledge base
14
14
  - User asks about stale or outdated notes
15
15
  - User wants to check their knowledge base status
16
+ - User wants to capture a quick thought or link
17
+ - User asks about extracted memories or facts
16
18
 
17
19
  ## Commands
18
20
 
@@ -21,14 +23,26 @@ Use `--json` flag on all commands when processing output programmatically.
21
23
  | Command | Purpose | Example |
22
24
  |---------|---------|---------|
23
25
  | `brain search "<query>" --json` | Hybrid search (BM25 + vector) | `brain search "authentication patterns" --json --limit 5` |
24
- | `brain search "<query>" --json --expand` | Search with graph-connected notes | `brain search "auth" --json --expand` |
26
+ | `brain search "<query>" --memories --json` | Search notes + extracted memories | `brain search "auth" --memories --json` |
27
+ | `brain search "<query>" --rerank --json` | Search with cross-encoder reranking | `brain search "auth" --rerank --json` |
28
+ | `brain search "<query>" --expand --json` | Search with graph-connected notes | `brain search "auth" --json --expand` |
25
29
  | `brain add <file>` | Add a note from file | `brain add ~/draft.md --type research --tier slow` |
26
30
  | `brain add --title "X" --type note` | Add from stdin | `echo "content" \| brain add --title "My Note" --type note` |
31
+ | `brain quick "thought"` | Zero-friction capture to inbox | `brain quick "look into WebSockets vs SSE"` |
32
+ | `brain inbox --json` | View inbox items | `brain inbox --status pending --json` |
33
+ | `brain extract --all` | Extract memories from all notes | Requires Ollama running locally |
34
+ | `brain extract --note <id>` | Extract memories from one note | `brain extract --note my-note-id` |
35
+ | `brain memories list --json` | List active memories | `brain memories list --container default --json` |
36
+ | `brain memories history <id>` | Show memory version chain | `brain memories history mem-abc123` |
37
+ | `brain memories stats` | Memory count + expiry sweep | Shows active count, runs auto-forget |
38
+ | `brain context <id> --json` | Note context (relations + memories) | `brain context my-note --json` |
39
+ | `brain profile --format json` | Agent context profile | `brain profile --container default --format json` |
27
40
  | `brain status --json` | Database stats | Shows note count, embeddings, staleness |
28
41
  | `brain stale --json` | Notes needing review | `brain stale --tier slow --json` |
29
42
  | `brain index` | Re-index all notes | Only run when user asks -- this is slow |
30
- | `brain template <type>` | Output frontmatter template | `brain template research` |
31
43
  | `brain graph <note-id> --json` | Show note relations | `brain graph my-note --json` |
44
+ | `brain doctor --json` | System health checks | Shows DB, embedder, LLM, inbox, stale status |
45
+ | `brain doctor --fix` | Auto-repair issues | Pulls missing models, resets failed inbox |
32
46
  | `brain config get` | Show config | `brain config get` |
33
47
 
34
48
  ## Search Filters