fieldtheory 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ import path from 'node:path';
2
+ import { createHash } from 'node:crypto';
3
+ import { writeFile } from 'node:fs/promises';
4
+ import { ensureDir, pathExists, readJson, readJsonLines, writeJson } from './fs.js';
5
+ import { bookmarkMediaDir, bookmarkMediaManifestPath, twitterBookmarksCachePath } from './paths.js';
6
+ function sanitizeExtFromContentType(contentType, sourceUrl) {
7
+ if (contentType?.includes('jpeg'))
8
+ return '.jpg';
9
+ if (contentType?.includes('png'))
10
+ return '.png';
11
+ if (contentType?.includes('gif'))
12
+ return '.gif';
13
+ if (contentType?.includes('webp'))
14
+ return '.webp';
15
+ if (contentType?.includes('mp4'))
16
+ return '.mp4';
17
+ try {
18
+ const ext = path.extname(new URL(sourceUrl ?? '').pathname);
19
+ if (ext)
20
+ return ext;
21
+ }
22
+ catch { }
23
+ return '.bin';
24
+ }
25
+ async function loadManifest() {
26
+ const manifestPath = bookmarkMediaManifestPath();
27
+ if (!(await pathExists(manifestPath)))
28
+ return null;
29
+ return readJson(manifestPath);
30
+ }
31
+ export async function fetchBookmarkMediaBatch(options = {}) {
32
+ const limit = options.limit ?? 100;
33
+ const maxBytes = options.maxBytes ?? 50 * 1024 * 1024;
34
+ const mediaDir = bookmarkMediaDir();
35
+ const manifestPath = bookmarkMediaManifestPath();
36
+ await ensureDir(mediaDir);
37
+ const bookmarks = await readJsonLines(twitterBookmarksCachePath());
38
+ const candidates = bookmarks
39
+ .filter((b) => (b.media?.length ?? 0) > 0 || (b.mediaObjects?.length ?? 0) > 0 || b.authorProfileImageUrl)
40
+ .slice(0, limit);
41
+ const previous = await loadManifest();
42
+ const priorKeys = new Set((previous?.entries ?? []).map((e) => `${e.bookmarkId}::${e.sourceUrl}`));
43
+ const entries = previous?.entries ? [...previous.entries] : [];
44
+ let downloaded = 0;
45
+ let skippedTooLarge = 0;
46
+ let failed = 0;
47
+ let processed = 0;
48
+ for (const bookmark of candidates) {
49
+ // Resolve media URLs: prefer mediaObjects (richer, includes video variants), fall back to media[]
50
+ const mediaUrls = [];
51
+ if (bookmark.mediaObjects?.length) {
52
+ for (const mo of bookmark.mediaObjects) {
53
+ if (mo.type === 'video' || mo.type === 'animated_gif') {
54
+ const mp4s = (mo.variants ?? [])
55
+ .filter((v) => v.contentType === 'video/mp4' && v.url)
56
+ .sort((a, b) => (b.bitrate ?? 0) - (a.bitrate ?? 0));
57
+ if (mp4s.length > 0 && mp4s[0].url) {
58
+ mediaUrls.push(mp4s[0].url);
59
+ continue;
60
+ }
61
+ }
62
+ if (mo.mediaUrl)
63
+ mediaUrls.push(mo.mediaUrl);
64
+ }
65
+ }
66
+ else {
67
+ mediaUrls.push(...(bookmark.media ?? []));
68
+ }
69
+ // Also include author profile image (upgraded to 400x400)
70
+ if (bookmark.authorProfileImageUrl) {
71
+ const fullUrl = bookmark.authorProfileImageUrl.replace('_normal.', '_400x400.');
72
+ if (!priorKeys.has(`${bookmark.id}::${fullUrl}`))
73
+ mediaUrls.push(fullUrl);
74
+ }
75
+ for (const sourceUrl of mediaUrls) {
76
+ const key = `${bookmark.id}::${sourceUrl}`;
77
+ if (priorKeys.has(key))
78
+ continue;
79
+ processed += 1;
80
+ const fetchedAt = new Date().toISOString();
81
+ try {
82
+ const head = await fetch(sourceUrl, { method: 'HEAD' });
83
+ const contentLengthHeader = head.headers.get('content-length');
84
+ const contentType = head.headers.get('content-type') ?? undefined;
85
+ const declaredBytes = contentLengthHeader ? Number(contentLengthHeader) : undefined;
86
+ if (typeof declaredBytes === 'number' && !Number.isNaN(declaredBytes) && declaredBytes > maxBytes) {
87
+ entries.push({
88
+ bookmarkId: bookmark.id,
89
+ tweetId: bookmark.tweetId,
90
+ tweetUrl: bookmark.url,
91
+ authorHandle: bookmark.authorHandle,
92
+ authorName: bookmark.authorName,
93
+ sourceUrl,
94
+ contentType,
95
+ bytes: declaredBytes,
96
+ status: 'skipped_too_large',
97
+ reason: `content-length ${declaredBytes} exceeds max ${maxBytes}`,
98
+ fetchedAt,
99
+ });
100
+ skippedTooLarge += 1;
101
+ continue;
102
+ }
103
+ const response = await fetch(sourceUrl);
104
+ if (!response.ok) {
105
+ entries.push({
106
+ bookmarkId: bookmark.id,
107
+ tweetId: bookmark.tweetId,
108
+ tweetUrl: bookmark.url,
109
+ authorHandle: bookmark.authorHandle,
110
+ authorName: bookmark.authorName,
111
+ sourceUrl,
112
+ status: 'failed',
113
+ reason: `HTTP ${response.status}`,
114
+ fetchedAt,
115
+ });
116
+ failed += 1;
117
+ continue;
118
+ }
119
+ const buffer = Buffer.from(await response.arrayBuffer());
120
+ if (buffer.byteLength > maxBytes) {
121
+ entries.push({
122
+ bookmarkId: bookmark.id,
123
+ tweetId: bookmark.tweetId,
124
+ tweetUrl: bookmark.url,
125
+ authorHandle: bookmark.authorHandle,
126
+ authorName: bookmark.authorName,
127
+ sourceUrl,
128
+ contentType: response.headers.get('content-type') ?? contentType ?? undefined,
129
+ bytes: buffer.byteLength,
130
+ status: 'skipped_too_large',
131
+ reason: `downloaded size ${buffer.byteLength} exceeds max ${maxBytes}`,
132
+ fetchedAt,
133
+ });
134
+ skippedTooLarge += 1;
135
+ continue;
136
+ }
137
+ const digest = createHash('sha256').update(buffer).digest('hex').slice(0, 16);
138
+ const ext = sanitizeExtFromContentType(response.headers.get('content-type') ?? contentType ?? undefined, sourceUrl);
139
+ const filename = `${bookmark.tweetId}-${digest}${ext}`;
140
+ const localPath = path.join(mediaDir, filename);
141
+ await writeFile(localPath, buffer);
142
+ entries.push({
143
+ bookmarkId: bookmark.id,
144
+ tweetId: bookmark.tweetId,
145
+ tweetUrl: bookmark.url,
146
+ authorHandle: bookmark.authorHandle,
147
+ authorName: bookmark.authorName,
148
+ sourceUrl,
149
+ localPath,
150
+ contentType: response.headers.get('content-type') ?? contentType ?? undefined,
151
+ bytes: buffer.byteLength,
152
+ status: 'downloaded',
153
+ fetchedAt,
154
+ });
155
+ downloaded += 1;
156
+ }
157
+ catch (error) {
158
+ entries.push({
159
+ bookmarkId: bookmark.id,
160
+ tweetId: bookmark.tweetId,
161
+ tweetUrl: bookmark.url,
162
+ authorHandle: bookmark.authorHandle,
163
+ authorName: bookmark.authorName,
164
+ sourceUrl,
165
+ status: 'failed',
166
+ reason: error instanceof Error ? error.message : String(error),
167
+ fetchedAt,
168
+ });
169
+ failed += 1;
170
+ }
171
+ }
172
+ }
173
+ const manifest = {
174
+ schemaVersion: 1,
175
+ generatedAt: new Date().toISOString(),
176
+ limit,
177
+ maxBytes,
178
+ processed,
179
+ downloaded,
180
+ skippedTooLarge,
181
+ failed,
182
+ entries,
183
+ };
184
+ await writeJson(manifestPath, manifest);
185
+ return manifest;
186
+ }