spectrawl 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "spectrawl",
3
- "version": "0.6.3",
3
+ "version": "0.6.4",
4
4
  "description": "The unified web layer for AI agents. Search (8 engines), stealth browse, auth, act on 24 platforms. Self-hosted.",
5
5
  "main": "src/index.js",
6
6
  "types": "index.d.ts",
@@ -103,6 +103,73 @@ class BrowseEngine {
103
103
  * Returns a function that fetches content via alternative methods.
104
104
  */
105
105
  _getSiteOverride(url) {
106
+ // X/Twitter: articles and posts can't be browsed without login
107
+ // Fallback: xAI Responses API with x_search tool (reads X posts natively)
108
+ if ((url.includes('x.com/') || url.includes('twitter.com/')) && url.includes('/status/')) {
109
+ return async (originalUrl, opts) => {
110
+ const xaiKey = process.env.XAI_API_KEY
111
+ if (!xaiKey) return null // no key, fall through to normal browse
112
+
113
+ try {
114
+ const https = require('https')
115
+ const body = JSON.stringify({
116
+ model: 'grok-4-1-fast-non-reasoning',
117
+ input: [{ role: 'user', content: `Return the FULL exact text of this X post and all replies/thread if it's a thread. Include the author's name and handle. No commentary, no analysis, just the raw content:\n\n${originalUrl}` }],
118
+ tools: [{ type: 'x_search' }]
119
+ })
120
+
121
+ const content = await new Promise((resolve, reject) => {
122
+ const req = https.request({
123
+ hostname: 'api.x.ai',
124
+ path: '/v1/responses',
125
+ method: 'POST',
126
+ headers: {
127
+ 'Content-Type': 'application/json',
128
+ 'Authorization': `Bearer ${xaiKey}`,
129
+ 'Content-Length': Buffer.byteLength(body)
130
+ },
131
+ timeout: 30000
132
+ }, res => {
133
+ let data = ''
134
+ res.on('data', c => data += c)
135
+ res.on('end', () => {
136
+ try {
137
+ const json = JSON.parse(data)
138
+ if (json.error) return resolve(null)
139
+ const output = json.output || []
140
+ for (const o of output) {
141
+ if (o.type === 'message') {
142
+ for (const c of (o.content || [])) {
143
+ if (c.text && c.text.length > 20) return resolve(c.text)
144
+ }
145
+ }
146
+ }
147
+ resolve(null)
148
+ } catch { resolve(null) }
149
+ })
150
+ })
151
+ req.on('error', () => resolve(null))
152
+ req.setTimeout(30000, () => { req.destroy(); resolve(null) })
153
+ req.write(body)
154
+ req.end()
155
+ })
156
+
157
+ if (content && content.length > 20) {
158
+ return {
159
+ content,
160
+ url: originalUrl,
161
+ title: 'X Post (via xAI)',
162
+ statusCode: 200,
163
+ cached: false,
164
+ engine: 'xai-x-search',
165
+ blocked: false
166
+ }
167
+ }
168
+ } catch (e) { /* fall through */ }
169
+ return null // fall through to normal browse
170
+ }
171
+ }
172
+
106
173
  // Reddit: datacenter IPs are fully blocked (browse, JSON, RSS all fail)
107
174
  // Fallback: PullPush API (free Reddit archive, no auth, no IP block)
108
175
  if (url.includes('reddit.com')) {