social-autoposter 1.6.59 → 1.6.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp/dist/version.json +2 -2
- package/package.json +1 -1
- package/scripts/scan_x_profile.py +23 -17
package/mcp/dist/version.json
CHANGED
package/package.json
CHANGED
|
@@ -233,25 +233,33 @@ _TIMELINE_JS_TMPL = r"""(function(){
|
|
|
233
233
|
})()"""
|
|
234
234
|
|
|
235
235
|
|
|
236
|
-
def scrape_timeline(send, me: str, want: int, max_scrolls: int =
|
|
236
|
+
def scrape_timeline(send, me: str, want: int, max_scrolls: int = 30,
|
|
237
237
|
exclude_ids: "set | None" = None) -> list:
|
|
238
238
|
"""Scroll the current timeline, collecting up to `want` of the user's OWN
|
|
239
239
|
authored articles (in DOM order = newest first). `exclude_ids` drops items
|
|
240
240
|
already captured elsewhere — that's how the comments pass (/with_replies)
|
|
241
241
|
subtracts the original posts to leave just replies. We do NOT rely on a
|
|
242
242
|
'Replying to' header: the profile /with_replies timeline doesn't render one
|
|
243
|
-
per article, so post-vs-reply is decided by set subtraction, not DOM text.
|
|
243
|
+
per article, so post-vs-reply is decided by set subtraction, not DOM text.
|
|
244
|
+
|
|
245
|
+
End-of-feed is detected by COLLECTED-COUNT STALL, not scrollHeight: x.com
|
|
246
|
+
virtualizes the timeline (unloads off-screen articles and keeps total height
|
|
247
|
+
~constant while swapping content), so scrollHeight plateaus even mid-feed and
|
|
248
|
+
would false-trigger an early stop. We instead stop when no NEW item has been
|
|
249
|
+
captured for `STALL_LIMIT` consecutive scrolls (after a min number of scrolls),
|
|
250
|
+
scrolling to the bottom each step to force the next lazy-load batch."""
|
|
244
251
|
seen: dict[str, dict] = {}
|
|
245
252
|
exclude_ids = exclude_ids or set()
|
|
246
253
|
expr = _TIMELINE_JS_TMPL % json.dumps(me.lower())
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
for
|
|
254
|
+
STALL_LIMIT = 4
|
|
255
|
+
stall = 0
|
|
256
|
+
for n in range(max_scrolls):
|
|
250
257
|
raw = _eval(send, expr) or "[]"
|
|
251
258
|
try:
|
|
252
259
|
batch = json.loads(raw)
|
|
253
260
|
except Exception:
|
|
254
261
|
batch = []
|
|
262
|
+
before = len(seen)
|
|
255
263
|
for item in batch:
|
|
256
264
|
key = item.get("id") or item.get("url") or item.get("text", "")[:80]
|
|
257
265
|
if not key or key in seen or key in exclude_ids:
|
|
@@ -259,20 +267,18 @@ def scrape_timeline(send, me: str, want: int, max_scrolls: int = 16,
|
|
|
259
267
|
seen[key] = item
|
|
260
268
|
if len(seen) >= want:
|
|
261
269
|
break
|
|
262
|
-
#
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
h = 0
|
|
268
|
-
if h == last_h:
|
|
269
|
-
stale += 1
|
|
270
|
-
if stale >= 3:
|
|
270
|
+
# No new items this pass? Count it as a stall. Give the feed a few
|
|
271
|
+
# consecutive empty scrolls (lazy-load can lag) before declaring the end.
|
|
272
|
+
if len(seen) == before and n > 0:
|
|
273
|
+
stall += 1
|
|
274
|
+
if stall >= STALL_LIMIT:
|
|
271
275
|
break
|
|
272
276
|
else:
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
277
|
+
stall = 0
|
|
278
|
+
# Scroll to the bottom of currently-loaded content to trigger the next
|
|
279
|
+
# batch, then wait for it to render before the next read.
|
|
280
|
+
_eval(send, "window.scrollTo(0, document.documentElement.scrollHeight);")
|
|
281
|
+
time.sleep(2.0)
|
|
276
282
|
items = list(seen.values())
|
|
277
283
|
return items[:want]
|
|
278
284
|
|