x-summary 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -0
- package/INSTRUCTIONS.example.md +113 -0
- package/LICENSE +619 -0
- package/README.md +160 -0
- package/config.example.json +16 -0
- package/dist/bundle/scrape.mjs +50 -0
- package/dist/bundle/scrape.mjs.map +7 -0
- package/dist/bundle/summarize.mjs +22 -0
- package/dist/bundle/summarize.mjs.map +7 -0
- package/package.json +76 -0
- package/schemas/config.schema.json +95 -0
- package/schemas/state.schema.json +123 -0
package/package.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "x-summary",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Summarize X.com Following and For You feeds using Playwright and an LLM",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/barbieri/x-summary.git"
|
|
8
|
+
},
|
|
9
|
+
"type": "module",
|
|
10
|
+
"private": false,
|
|
11
|
+
"license": "GPL-3.0-or-later",
|
|
12
|
+
"engines": {
|
|
13
|
+
"node": ">=24.15.0"
|
|
14
|
+
},
|
|
15
|
+
"packageManager": "pnpm@11.2.2",
|
|
16
|
+
"files": [
|
|
17
|
+
"dist/bundle",
|
|
18
|
+
"schemas",
|
|
19
|
+
"config.example.json",
|
|
20
|
+
"INSTRUCTIONS.example.md",
|
|
21
|
+
".env.example",
|
|
22
|
+
"LICENSE",
|
|
23
|
+
"README.md"
|
|
24
|
+
],
|
|
25
|
+
"bin": {
|
|
26
|
+
"x-summary-scrape": "dist/bundle/scrape.mjs",
|
|
27
|
+
"x-summary-summarize": "dist/bundle/summarize.mjs"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsc && node scripts/build-cli.mjs",
|
|
31
|
+
"build:cli": "node scripts/build-cli.mjs",
|
|
32
|
+
"check": "biome check --error-on-warnings .",
|
|
33
|
+
"check:fix": "biome check --write --error-on-warnings .",
|
|
34
|
+
"format": "biome format --write .",
|
|
35
|
+
"lint": "biome lint --error-on-warnings .",
|
|
36
|
+
"qa": "run-p check build test typecheck",
|
|
37
|
+
"start": "run-s --silent scrape summarize --",
|
|
38
|
+
"start:bundle": "run-s --silent scrape:bundle summarize:bundle --",
|
|
39
|
+
"scrape": "node --import tsx src/scrape.ts",
|
|
40
|
+
"summarize": "node --import tsx src/summarize.ts",
|
|
41
|
+
"scrape:bundle": "node dist/bundle/scrape.mjs",
|
|
42
|
+
"summarize:bundle": "node dist/bundle/summarize.mjs",
|
|
43
|
+
"inspect:x": "node --import tsx scripts/inspect-x-dom.ts",
|
|
44
|
+
"test": "vitest run",
|
|
45
|
+
"test:watch": "vitest",
|
|
46
|
+
"typecheck": "tsc --noEmit -p tsconfig.json && tsc --noEmit -p tsconfig.test.json",
|
|
47
|
+
"prepare": "node scripts/prepare.mjs",
|
|
48
|
+
"prepublishOnly": "pnpm run build"
|
|
49
|
+
},
|
|
50
|
+
"dependencies": {
|
|
51
|
+
"@ai-sdk/anthropic": "^3.0.76",
|
|
52
|
+
"@ai-sdk/google": "^3.0.75",
|
|
53
|
+
"@ai-sdk/openai": "^3.0.64",
|
|
54
|
+
"@ai-sdk/xai": "^3.0.91",
|
|
55
|
+
"@openrouter/ai-sdk-provider": "^2.9.0",
|
|
56
|
+
"ai": "^6.0.185",
|
|
57
|
+
"ai-sdk-provider-opencode-sdk": "^3.0.2",
|
|
58
|
+
"ajv": "8.20.0",
|
|
59
|
+
"ajv-formats": "^3.0.1",
|
|
60
|
+
"dotenv": "^17.4.2",
|
|
61
|
+
"pino": "^10.3.1",
|
|
62
|
+
"playwright": "^1.60.0"
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"@biomejs/biome": "^2.4.15",
|
|
66
|
+
"@tsconfig/strictest": "^2.0.8",
|
|
67
|
+
"@types/node": "^25.9.1",
|
|
68
|
+
"esbuild": "^0.28.0",
|
|
69
|
+
"husky": "^9.1.7",
|
|
70
|
+
"npm-run-all": "^4.1.5",
|
|
71
|
+
"pino-pretty": "^13.1.3",
|
|
72
|
+
"tsx": "^4.20.6",
|
|
73
|
+
"typescript": "6.0.3",
|
|
74
|
+
"vitest": "^4.1.7"
|
|
75
|
+
}
|
|
76
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://x-summary.local/schemas/config.schema.json",
|
|
4
|
+
"title": "X Summary Config",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": ["ownerHandle", "timeWindowMinutes", "instructionsPath", "monitored", "llm"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"ownerHandle": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"minLength": 1,
|
|
12
|
+
"description": "Handle of the logged-in owner; used to verify the active browser session."
|
|
13
|
+
},
|
|
14
|
+
"timeWindowMinutes": {
|
|
15
|
+
"type": "integer",
|
|
16
|
+
"minimum": 1,
|
|
17
|
+
"description": "Collect posts from the last N minutes."
|
|
18
|
+
},
|
|
19
|
+
"statePath": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"default": "./tmp/state.json",
|
|
22
|
+
"description": "Path to the persisted scrape state JSON file."
|
|
23
|
+
},
|
|
24
|
+
"instructionsPath": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"minLength": 1,
|
|
27
|
+
"description": "Path to INSTRUCTIONS.md prepended to the LLM prompt."
|
|
28
|
+
},
|
|
29
|
+
"monitored": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"items": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"minLength": 1
|
|
34
|
+
},
|
|
35
|
+
"uniqueItems": true,
|
|
36
|
+
"description": "Handles not followed but included in summarization."
|
|
37
|
+
},
|
|
38
|
+
"headless": {
|
|
39
|
+
"type": "boolean",
|
|
40
|
+
"default": true,
|
|
41
|
+
"description": "When true (default), run the browser in headless mode. When false, run the browser in visible mode."
|
|
42
|
+
},
|
|
43
|
+
"abortOnIncorrectOwnerHandle": {
|
|
44
|
+
"type": "boolean",
|
|
45
|
+
"default": false,
|
|
46
|
+
"description": "When true, exit with an error if login is required or the active session does not match ownerHandle. When false (default), keep the browser open and wait for you to log in or switch accounts."
|
|
47
|
+
},
|
|
48
|
+
"browserProfilePath": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"default": "./tmp/browser-profile",
|
|
51
|
+
"description": "Chrome user-data directory reused across runs (cookies, localStorage, IndexedDB). Not used for Vitest."
|
|
52
|
+
},
|
|
53
|
+
"browserCdpEndpoint": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "Optional Chrome DevTools Protocol URL (e.g. http://127.0.0.1:9222). Attach to Chrome you start manually — best for avoiding FedCM/Google login failures."
|
|
56
|
+
},
|
|
57
|
+
"timezone": {
|
|
58
|
+
"type": "string",
|
|
59
|
+
"minLength": 1,
|
|
60
|
+
"description": "Optional IANA timezone for the summarization prompt (e.g. America/Sao_Paulo). Used when humanizing timestamps and the covered time window."
|
|
61
|
+
},
|
|
62
|
+
"parallelTabs": {
|
|
63
|
+
"type": "integer",
|
|
64
|
+
"minimum": 1,
|
|
65
|
+
"default": 4,
|
|
66
|
+
"description": "Number of browser tabs used in parallel when scraping post detail pages and referenced posts."
|
|
67
|
+
},
|
|
68
|
+
"summarizeNoPosts": {
|
|
69
|
+
"type": "boolean",
|
|
70
|
+
"default": false,
|
|
71
|
+
"description": "When false (default), summarization skips the LLM and returns an empty string if the state has no posts. When true, call the LLM anyway so INSTRUCTIONS.md can shape a custom no-posts message (e.g. translation or tone)."
|
|
72
|
+
},
|
|
73
|
+
"llm": {
|
|
74
|
+
"type": "object",
|
|
75
|
+
"additionalProperties": false,
|
|
76
|
+
"required": ["provider", "model"],
|
|
77
|
+
"properties": {
|
|
78
|
+
"provider": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"enum": ["openai", "anthropic", "google", "xai", "openrouter", "opencode"]
|
|
81
|
+
},
|
|
82
|
+
"model": {
|
|
83
|
+
"type": "string",
|
|
84
|
+
"minLength": 1
|
|
85
|
+
},
|
|
86
|
+
"temperature": {
|
|
87
|
+
"type": "number",
|
|
88
|
+
"minimum": 0,
|
|
89
|
+
"maximum": 1,
|
|
90
|
+
"description": "Sampling temperature passed to generateText. Use lower values for more deterministic and focused summaries."
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://x-summary.local/schemas/state.schema.json",
|
|
4
|
+
"title": "X Summary State",
|
|
5
|
+
"description": "Scraped timeline snapshot. Feed lists hold ordered post hrefs; full post data (stats, author, timestamp, body, references, thread) lives in posts keyed by canonical href.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": [
|
|
9
|
+
"timestamp",
|
|
10
|
+
"cutoffTimestamp",
|
|
11
|
+
"posts",
|
|
12
|
+
"following",
|
|
13
|
+
"forYouSuggestions",
|
|
14
|
+
"monitored"
|
|
15
|
+
],
|
|
16
|
+
"properties": {
|
|
17
|
+
"timestamp": {
|
|
18
|
+
"description": "ISO8601 time when this state snapshot was generated.",
|
|
19
|
+
"type": "string",
|
|
20
|
+
"format": "date-time"
|
|
21
|
+
},
|
|
22
|
+
"cutoffTimestamp": {
|
|
23
|
+
"description": "Absolute ISO8601 instant for the start of the collection window (not a duration or minute offset). First run: scrape time minus timeWindowMinutes. Incremental run: previous state timestamp.",
|
|
24
|
+
"type": "string",
|
|
25
|
+
"format": "date-time"
|
|
26
|
+
},
|
|
27
|
+
"posts": {
|
|
28
|
+
"description": "All scraped posts keyed by canonical href. following, forYouSuggestions, monitored, and post references/thread point into this map.",
|
|
29
|
+
"type": "object",
|
|
30
|
+
"propertyNames": { "$ref": "#/$defs/postHref" },
|
|
31
|
+
"additionalProperties": { "$ref": "#/$defs/post" }
|
|
32
|
+
},
|
|
33
|
+
"following": {
|
|
34
|
+
"description": "Ordered hrefs into posts for Following > Recent.",
|
|
35
|
+
"type": "array",
|
|
36
|
+
"items": { "$ref": "#/$defs/postHref" }
|
|
37
|
+
},
|
|
38
|
+
"forYouSuggestions": {
|
|
39
|
+
"description": "Ordered hrefs into posts for For You suggestions.",
|
|
40
|
+
"type": "array",
|
|
41
|
+
"items": { "$ref": "#/$defs/postHref" }
|
|
42
|
+
},
|
|
43
|
+
"monitored": {
|
|
44
|
+
"description": "Ordered hrefs into posts per monitored handle.",
|
|
45
|
+
"type": "object",
|
|
46
|
+
"propertyNames": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"minLength": 1
|
|
49
|
+
},
|
|
50
|
+
"additionalProperties": {
|
|
51
|
+
"type": "array",
|
|
52
|
+
"items": { "$ref": "#/$defs/postHref" }
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
"$defs": {
|
|
57
|
+
"postHref": {
|
|
58
|
+
"description": "Canonical post URL; must exist as a key in posts.",
|
|
59
|
+
"type": "string",
|
|
60
|
+
"format": "uri"
|
|
61
|
+
},
|
|
62
|
+
"stats": {
|
|
63
|
+
"description": "Engagement counts at scrape time.",
|
|
64
|
+
"type": "object",
|
|
65
|
+
"additionalProperties": false,
|
|
66
|
+
"required": ["comments", "reposts", "likes"],
|
|
67
|
+
"properties": {
|
|
68
|
+
"comments": { "type": "integer", "minimum": 0 },
|
|
69
|
+
"reposts": { "type": "integer", "minimum": 0 },
|
|
70
|
+
"likes": { "type": "integer", "minimum": 0 }
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"resolvedLink": {
|
|
74
|
+
"description": "External URL after redirects, with page metadata.",
|
|
75
|
+
"type": "object",
|
|
76
|
+
"additionalProperties": false,
|
|
77
|
+
"required": ["url"],
|
|
78
|
+
"properties": {
|
|
79
|
+
"url": { "type": "string", "format": "uri" },
|
|
80
|
+
"title": { "type": "string" },
|
|
81
|
+
"description": { "type": "string" }
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
"post": {
|
|
85
|
+
"description": "Post payload in posts (key = canonical href). references and thread are href lists into posts.",
|
|
86
|
+
"type": "object",
|
|
87
|
+
"additionalProperties": false,
|
|
88
|
+
"required": ["stats"],
|
|
89
|
+
"properties": {
|
|
90
|
+
"author": {
|
|
91
|
+
"description": "Handle of the post author.",
|
|
92
|
+
"type": "string",
|
|
93
|
+
"minLength": 1
|
|
94
|
+
},
|
|
95
|
+
"timestamp": {
|
|
96
|
+
"description": "ISO8601 time when the post was published.",
|
|
97
|
+
"type": "string",
|
|
98
|
+
"format": "date-time"
|
|
99
|
+
},
|
|
100
|
+
"stats": { "$ref": "#/$defs/stats" },
|
|
101
|
+
"body": {
|
|
102
|
+
"description": "Post text as markdown. Omitted for reposts without custom text.",
|
|
103
|
+
"type": "string"
|
|
104
|
+
},
|
|
105
|
+
"thread": {
|
|
106
|
+
"description": "Hrefs of ancestor posts in the same thread (root-first); keys in posts.",
|
|
107
|
+
"type": "array",
|
|
108
|
+
"items": { "$ref": "#/$defs/postHref" }
|
|
109
|
+
},
|
|
110
|
+
"links": {
|
|
111
|
+
"description": "External links from the body, resolved with title and description.",
|
|
112
|
+
"type": "array",
|
|
113
|
+
"items": { "$ref": "#/$defs/resolvedLink" }
|
|
114
|
+
},
|
|
115
|
+
"references": {
|
|
116
|
+
"description": "Hrefs of quoted or referenced posts; keys in posts.",
|
|
117
|
+
"type": "array",
|
|
118
|
+
"items": { "$ref": "#/$defs/postHref" }
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|