triangle-utils 1.4.12 → 1.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/UtilsNitter.d.ts +20 -0
- package/dist/src/UtilsNitter.js +136 -0
- package/dist/src/UtilsXAI.d.ts +24 -0
- package/dist/src/UtilsXAI.js +100 -0
- package/dist/src/index.d.ts +7 -1
- package/dist/src/index.js +9 -1
- package/package.json +4 -1
- package/src/UtilsNitter.ts +148 -0
- package/src/UtilsXAI.ts +117 -0
- package/src/index.ts +9 -1
- package/tsconfig.json +1 -1
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { UtilsBee } from "./UtilsBee.js";
|
|
2
|
+
export declare class Tweet {
|
|
3
|
+
readonly tweet_id: string;
|
|
4
|
+
readonly username: string;
|
|
5
|
+
readonly url: string;
|
|
6
|
+
readonly tweet_time: string;
|
|
7
|
+
readonly tweet_content: string;
|
|
8
|
+
readonly quote?: string;
|
|
9
|
+
readonly html: string;
|
|
10
|
+
readonly text: string;
|
|
11
|
+
constructor(tweet: any);
|
|
12
|
+
static is(tweet: any): tweet is Tweet;
|
|
13
|
+
}
|
|
14
|
+
export declare class UtilsNitter extends UtilsBee {
|
|
15
|
+
constructor(scraping_bee_api_key: string | undefined);
|
|
16
|
+
get_tweets(username: string, options?: {
|
|
17
|
+
min_tweet_id?: string;
|
|
18
|
+
min_tweet_time?: string;
|
|
19
|
+
}): Promise<Tweet[]>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom";
|
|
2
|
+
import { UtilsBee } from "./UtilsBee.js";
|
|
3
|
+
import { UtilsMisc } from "./UtilsMisc.js";
|
|
4
|
+
export class Tweet {
|
|
5
|
+
tweet_id;
|
|
6
|
+
username;
|
|
7
|
+
url;
|
|
8
|
+
tweet_time;
|
|
9
|
+
tweet_content;
|
|
10
|
+
quote;
|
|
11
|
+
html;
|
|
12
|
+
text;
|
|
13
|
+
constructor(tweet) {
|
|
14
|
+
if (!Tweet.is(tweet)) {
|
|
15
|
+
throw Error("Invalid input.");
|
|
16
|
+
}
|
|
17
|
+
this.tweet_id = tweet.tweet_id;
|
|
18
|
+
this.username = tweet.username;
|
|
19
|
+
this.url = tweet.url;
|
|
20
|
+
this.tweet_time = tweet.tweet_time;
|
|
21
|
+
this.tweet_content = tweet.tweet_content;
|
|
22
|
+
this.quote = tweet.quote;
|
|
23
|
+
this.html = tweet.html;
|
|
24
|
+
this.text = tweet.text;
|
|
25
|
+
}
|
|
26
|
+
static is(tweet) {
|
|
27
|
+
return (tweet !== undefined &&
|
|
28
|
+
typeof tweet.tweet_id === "string" &&
|
|
29
|
+
typeof tweet.username === "string" &&
|
|
30
|
+
typeof tweet.url === "string" &&
|
|
31
|
+
typeof tweet.tweet_time === "string" &&
|
|
32
|
+
typeof tweet.tweet_content === "string" &&
|
|
33
|
+
(tweet.quote === undefined || typeof tweet.quote === "string") &&
|
|
34
|
+
(typeof tweet.html === "string") &&
|
|
35
|
+
(typeof tweet.text === "string"));
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
function parse_tweet_element(tweet_element) {
|
|
39
|
+
if (tweet_element.className.includes("show-more")) {
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
const tweet_link = tweet_element.querySelector(".tweet-link");
|
|
43
|
+
if (tweet_link === null) {
|
|
44
|
+
return undefined;
|
|
45
|
+
}
|
|
46
|
+
const url_element = tweet_link.getAttribute("href");
|
|
47
|
+
const url = url_element !== null ? url_element.replace("#m", "") : undefined;
|
|
48
|
+
if (url === undefined) {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
const username = url.split("/")[1];
|
|
52
|
+
const tweet_number = url.split("/")[3].padStart(20, "0");
|
|
53
|
+
const tweet_date = tweet_element.querySelector(".tweet-date")?.querySelector("a")?.getAttribute("title")?.replace(" · ", " ");
|
|
54
|
+
if (tweet_date === undefined) {
|
|
55
|
+
return undefined;
|
|
56
|
+
}
|
|
57
|
+
const tweet_time = (new Date(tweet_date)).toISOString();
|
|
58
|
+
const tweet_id = tweet_time.substring(0, 16) + "|" + tweet_number;
|
|
59
|
+
const tweet_content = tweet_element.querySelector(".tweet-content")?.innerHTML?.replaceAll(/\<[^\>]+\>/g, "");
|
|
60
|
+
if (tweet_content === undefined) {
|
|
61
|
+
return undefined;
|
|
62
|
+
}
|
|
63
|
+
const tweet_quote = tweet_element.querySelector(".quote-big");
|
|
64
|
+
const quote = tweet_quote !== null ?
|
|
65
|
+
tweet_quote.innerHTML
|
|
66
|
+
.replaceAll(/\<[^\>]+\>/g, "")
|
|
67
|
+
.replaceAll(/[^\n\S\r]+/g, " ").split("\n").map(line => line.trim()).filter(line => line !== "").join("\n")
|
|
68
|
+
:
|
|
69
|
+
undefined;
|
|
70
|
+
const text = "Tweet from @" + username + " on " + tweet_time + ":\n\n" +
|
|
71
|
+
tweet_content +
|
|
72
|
+
(quote !== undefined ? ("\n\nQuote:\n\n" + quote) : "");
|
|
73
|
+
const html = tweet_element.innerHTML;
|
|
74
|
+
const tweet = {
|
|
75
|
+
tweet_id: tweet_id,
|
|
76
|
+
username: username,
|
|
77
|
+
url: url,
|
|
78
|
+
tweet_time: tweet_time,
|
|
79
|
+
tweet_content: tweet_content,
|
|
80
|
+
quote: quote,
|
|
81
|
+
html: html,
|
|
82
|
+
text: text
|
|
83
|
+
};
|
|
84
|
+
return tweet;
|
|
85
|
+
}
|
|
86
|
+
function parse_nitter_html(nitter_html) {
|
|
87
|
+
const dom = new JSDOM(nitter_html);
|
|
88
|
+
const root = dom.window.document.body;
|
|
89
|
+
const tweet_elements = root.querySelectorAll(".timeline-item");
|
|
90
|
+
const tweets = Array.from(tweet_elements).map(parse_tweet_element).filter(tweet => tweet !== undefined);
|
|
91
|
+
const next_button = root.querySelector(".show-more:not(.timeline-item)");
|
|
92
|
+
const next_button_link = next_button !== null ? next_button.querySelector("a") : null;
|
|
93
|
+
const next_nitter_query = (next_button_link !== null ? next_button_link.getAttribute("href") : null) || undefined;
|
|
94
|
+
return { tweets: tweets, next_nitter_query: next_nitter_query };
|
|
95
|
+
}
|
|
96
|
+
export class UtilsNitter extends UtilsBee {
|
|
97
|
+
constructor(scraping_bee_api_key) {
|
|
98
|
+
super(scraping_bee_api_key);
|
|
99
|
+
}
|
|
100
|
+
async get_tweets(username, options) {
|
|
101
|
+
const min_tweet_id = options !== undefined ? options.min_tweet_id : undefined;
|
|
102
|
+
const min_tweet_time = options !== undefined ? options.min_tweet_time : undefined;
|
|
103
|
+
console.log("Querying Twitter: @" + username, "min_tweet_id:", min_tweet_id, "min_tweet_time:", min_tweet_time);
|
|
104
|
+
let nitter_query = "";
|
|
105
|
+
const tweets = [];
|
|
106
|
+
while (nitter_query !== undefined) {
|
|
107
|
+
const nitter_url = "https://nitter.net/" + username + nitter_query;
|
|
108
|
+
console.log(nitter_url);
|
|
109
|
+
let nitter_html = undefined;
|
|
110
|
+
for (let i = 0; i < 10; i++) {
|
|
111
|
+
nitter_html = await this.get(nitter_url, { render_js: false });
|
|
112
|
+
if (nitter_html !== undefined && nitter_html !== "") {
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
console.log("Failed to query Nitter, trying again.");
|
|
116
|
+
await UtilsMisc.wait((2 + 2 * i) * 1000);
|
|
117
|
+
}
|
|
118
|
+
if (nitter_html === undefined || nitter_html === "") {
|
|
119
|
+
console.log("Nitter failed for url", nitter_url);
|
|
120
|
+
return [];
|
|
121
|
+
}
|
|
122
|
+
const nitter_data = parse_nitter_html(nitter_html);
|
|
123
|
+
nitter_query = nitter_data.next_nitter_query;
|
|
124
|
+
for (const tweet of nitter_data.tweets) {
|
|
125
|
+
if (min_tweet_id !== undefined && tweet.tweet_id.localeCompare(min_tweet_id) < 0) {
|
|
126
|
+
nitter_query = undefined;
|
|
127
|
+
}
|
|
128
|
+
if (min_tweet_time !== undefined && tweet.tweet_time.localeCompare(min_tweet_time) < 0) {
|
|
129
|
+
nitter_query = undefined;
|
|
130
|
+
}
|
|
131
|
+
tweets.push(tweet);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return tweets;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export interface GrokMessage {
|
|
2
|
+
role: "system" | "user" | "assistant";
|
|
3
|
+
content: string;
|
|
4
|
+
}
|
|
5
|
+
export declare class GrokSource {
|
|
6
|
+
readonly title: string;
|
|
7
|
+
readonly url: string;
|
|
8
|
+
readonly start_index: number;
|
|
9
|
+
readonly end_index: number;
|
|
10
|
+
readonly type: string;
|
|
11
|
+
constructor(grok_source: any);
|
|
12
|
+
static is(grok_source: any): grok_source is GrokSource;
|
|
13
|
+
}
|
|
14
|
+
export interface GrokResponse {
|
|
15
|
+
text: string;
|
|
16
|
+
sources: GrokSource[];
|
|
17
|
+
}
|
|
18
|
+
export declare class UtilsXAI {
|
|
19
|
+
private readonly xai_api_key;
|
|
20
|
+
constructor(xai_api_key: string | undefined);
|
|
21
|
+
grok_query(model: string, prompt: string, conversation_history?: GrokMessage[], options?: {
|
|
22
|
+
require_sources: boolean;
|
|
23
|
+
}): Promise<GrokResponse | undefined>;
|
|
24
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
export class GrokSource {
|
|
2
|
+
title;
|
|
3
|
+
url;
|
|
4
|
+
start_index;
|
|
5
|
+
end_index;
|
|
6
|
+
type;
|
|
7
|
+
constructor(grok_source) {
|
|
8
|
+
if (!GrokSource.is(grok_source)) {
|
|
9
|
+
throw Error("Invalid input.");
|
|
10
|
+
}
|
|
11
|
+
this.title = grok_source.title;
|
|
12
|
+
this.url = grok_source.url;
|
|
13
|
+
this.start_index = grok_source.start_index;
|
|
14
|
+
this.end_index = grok_source.end_index;
|
|
15
|
+
this.type = grok_source.type;
|
|
16
|
+
}
|
|
17
|
+
static is(grok_source) {
|
|
18
|
+
return (grok_source !== undefined &&
|
|
19
|
+
typeof grok_source.title === "string" &&
|
|
20
|
+
typeof grok_source.url === "string" &&
|
|
21
|
+
typeof grok_source.start_index === "number" &&
|
|
22
|
+
typeof grok_source.end_index === "number" &&
|
|
23
|
+
typeof grok_source.type === "string");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export class UtilsXAI {
|
|
27
|
+
xai_api_key;
|
|
28
|
+
constructor(xai_api_key) {
|
|
29
|
+
this.xai_api_key = xai_api_key;
|
|
30
|
+
}
|
|
31
|
+
async grok_query(model, prompt, conversation_history = [], options) {
|
|
32
|
+
const require_sources = options !== undefined ? options.require_sources : false;
|
|
33
|
+
if (this.xai_api_key === undefined) {
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
const messages = [
|
|
37
|
+
...conversation_history,
|
|
38
|
+
{ role: "user", content: prompt }
|
|
39
|
+
];
|
|
40
|
+
for (let i = 0; i < 3; i++) {
|
|
41
|
+
try {
|
|
42
|
+
const grok_response = await Promise.any([0, 1, 2].map(async (j) => {
|
|
43
|
+
const response = await fetch("https://api.x.ai/v1/responses", {
|
|
44
|
+
method: "POST",
|
|
45
|
+
body: JSON.stringify({
|
|
46
|
+
model: model,
|
|
47
|
+
input: messages,
|
|
48
|
+
tools: [
|
|
49
|
+
{
|
|
50
|
+
type: "web_search"
|
|
51
|
+
}
|
|
52
|
+
]
|
|
53
|
+
}),
|
|
54
|
+
headers: {
|
|
55
|
+
"Accept": "application/json",
|
|
56
|
+
"Content-type": "application/json",
|
|
57
|
+
"Authorization": "Bearer " + this.xai_api_key
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
const data = await response.json();
|
|
61
|
+
const output = data.output;
|
|
62
|
+
if (output === undefined || !Array.isArray(output) || output.length === 0) {
|
|
63
|
+
throw Error("Bad response from Grok.");
|
|
64
|
+
}
|
|
65
|
+
const content = output.at(-1).content;
|
|
66
|
+
if (content === undefined || !Array.isArray(content) || content[0] === undefined) {
|
|
67
|
+
throw Error("Bad response from Grok.");
|
|
68
|
+
}
|
|
69
|
+
const grok_message = content[0];
|
|
70
|
+
const text = grok_message.text;
|
|
71
|
+
if (text.includes("<") || text.includes(">")) {
|
|
72
|
+
throw Error("Response contains tags.");
|
|
73
|
+
}
|
|
74
|
+
if (text.includes("[web:")) {
|
|
75
|
+
throw Error("Response contains [web:].");
|
|
76
|
+
}
|
|
77
|
+
const sources = [];
|
|
78
|
+
if (Array.isArray(grok_message.annotations) && grok_message.annotations.every(GrokSource.is)) {
|
|
79
|
+
sources.push(...grok_message.annotations);
|
|
80
|
+
}
|
|
81
|
+
if (require_sources && sources.length === 0) {
|
|
82
|
+
throw Error("Bad sources.");
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
text: text,
|
|
86
|
+
sources: sources
|
|
87
|
+
};
|
|
88
|
+
}));
|
|
89
|
+
return grok_response;
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
if (error instanceof Error) {
|
|
93
|
+
console.log(error.stack);
|
|
94
|
+
}
|
|
95
|
+
console.log("Failed to get from Grok.");
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
}
|
package/dist/src/index.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ import { UtilsS3Vectors } from "./UtilsS3Vectors.js";
|
|
|
7
7
|
import { UtilsCognito } from "./UtilsCognito.js";
|
|
8
8
|
import { UtilsMisc } from "./UtilsMisc.js";
|
|
9
9
|
import { UtilsYoutube } from "./UtilsYoutube.js";
|
|
10
|
+
import { UtilsXAI } from "./UtilsXAI.js";
|
|
11
|
+
import { UtilsNitter } from "./UtilsNitter.js";
|
|
10
12
|
export declare class TriangleUtils extends UtilsMisc {
|
|
11
13
|
readonly dynamodb: UtilsDynamoDB;
|
|
12
14
|
readonly s3: UtilsS3;
|
|
@@ -15,6 +17,8 @@ export declare class TriangleUtils extends UtilsMisc {
|
|
|
15
17
|
readonly cognito: UtilsCognito;
|
|
16
18
|
readonly bee: UtilsBee;
|
|
17
19
|
readonly youtube: UtilsYoutube;
|
|
20
|
+
readonly xai: UtilsXAI;
|
|
21
|
+
readonly nitter: UtilsNitter;
|
|
18
22
|
constructor(config: TriangleUtilsConfig);
|
|
19
23
|
}
|
|
20
24
|
export * from "./types/TriangleUtilsConfig.js";
|
|
@@ -23,6 +27,8 @@ export * from "./UtilsDynamoDB.js";
|
|
|
23
27
|
export * from "./UtilsS3.js";
|
|
24
28
|
export * from "./UtilsBedrock.js";
|
|
25
29
|
export * from "./UtilsBee.js";
|
|
26
|
-
export * from "./UtilsS3Vectors.js";
|
|
27
30
|
export * from "./UtilsCognito.js";
|
|
31
|
+
export * from "./UtilsS3Vectors.js";
|
|
32
|
+
export * from "./UtilsNitter.js";
|
|
33
|
+
export * from "./UtilsXAI.js";
|
|
28
34
|
export * from "./UtilsYoutube.js";
|
package/dist/src/index.js
CHANGED
|
@@ -6,6 +6,8 @@ import { UtilsS3Vectors } from "./UtilsS3Vectors.js";
|
|
|
6
6
|
import { UtilsCognito } from "./UtilsCognito.js";
|
|
7
7
|
import { UtilsMisc } from "./UtilsMisc.js";
|
|
8
8
|
import { UtilsYoutube } from "./UtilsYoutube.js";
|
|
9
|
+
import { UtilsXAI } from "./UtilsXAI.js";
|
|
10
|
+
import { UtilsNitter } from "./UtilsNitter.js";
|
|
9
11
|
export class TriangleUtils extends UtilsMisc {
|
|
10
12
|
dynamodb;
|
|
11
13
|
s3;
|
|
@@ -14,6 +16,8 @@ export class TriangleUtils extends UtilsMisc {
|
|
|
14
16
|
cognito;
|
|
15
17
|
bee;
|
|
16
18
|
youtube;
|
|
19
|
+
xai;
|
|
20
|
+
nitter;
|
|
17
21
|
constructor(config) {
|
|
18
22
|
super(config);
|
|
19
23
|
this.dynamodb = new UtilsDynamoDB(config.region);
|
|
@@ -23,6 +27,8 @@ export class TriangleUtils extends UtilsMisc {
|
|
|
23
27
|
this.cognito = new UtilsCognito(config.region);
|
|
24
28
|
this.bee = new UtilsBee(config.scraping_bee_api_key);
|
|
25
29
|
this.youtube = new UtilsYoutube(config.youtube_api_key);
|
|
30
|
+
this.xai = new UtilsXAI(config.xai_api_key);
|
|
31
|
+
this.nitter = new UtilsNitter(config.scraping_bee_api_key);
|
|
26
32
|
}
|
|
27
33
|
}
|
|
28
34
|
export * from "./types/TriangleUtilsConfig.js";
|
|
@@ -31,6 +37,8 @@ export * from "./UtilsDynamoDB.js";
|
|
|
31
37
|
export * from "./UtilsS3.js";
|
|
32
38
|
export * from "./UtilsBedrock.js";
|
|
33
39
|
export * from "./UtilsBee.js";
|
|
34
|
-
export * from "./UtilsS3Vectors.js";
|
|
35
40
|
export * from "./UtilsCognito.js";
|
|
41
|
+
export * from "./UtilsS3Vectors.js";
|
|
42
|
+
export * from "./UtilsNitter.js";
|
|
43
|
+
export * from "./UtilsXAI.js";
|
|
36
44
|
export * from "./UtilsYoutube.js";
|
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "triangle-utils",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.14",
|
|
4
4
|
"main": "dist/src/index.js",
|
|
5
|
+
"types": "dist/src/index.d.ts",
|
|
5
6
|
"directories": {
|
|
6
7
|
"test": "vitest"
|
|
7
8
|
},
|
|
@@ -21,9 +22,11 @@
|
|
|
21
22
|
"@aws-sdk/client-s3vectors": "^3.953.0",
|
|
22
23
|
"@aws-sdk/client-secrets-manager": "^3.965.0",
|
|
23
24
|
"@aws-sdk/s3-request-presigner": "^3.953.0",
|
|
25
|
+
"@types/jsdom": "^28.0.1",
|
|
24
26
|
"@types/node": "^25.2.3",
|
|
25
27
|
"@types/nodemailer": "^7.0.9",
|
|
26
28
|
"googleapis": "^170.0.0",
|
|
29
|
+
"jsdom": "^29.0.1",
|
|
27
30
|
"nodemailer": "^7.0.11",
|
|
28
31
|
"scrapingbee": "^1.8.2"
|
|
29
32
|
},
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom"
|
|
2
|
+
|
|
3
|
+
import { UtilsBee } from "./UtilsBee"
|
|
4
|
+
import { UtilsMisc } from "./UtilsMisc"
|
|
5
|
+
|
|
6
|
+
export class Tweet {
|
|
7
|
+
readonly tweet_id : string
|
|
8
|
+
readonly username : string
|
|
9
|
+
readonly url : string
|
|
10
|
+
readonly tweet_time : string
|
|
11
|
+
readonly tweet_content : string
|
|
12
|
+
readonly quote? : string
|
|
13
|
+
readonly html : string
|
|
14
|
+
readonly text : string
|
|
15
|
+
|
|
16
|
+
constructor(tweet : any) {
|
|
17
|
+
if (!Tweet.is(tweet)) {
|
|
18
|
+
throw Error("Invalid input.")
|
|
19
|
+
}
|
|
20
|
+
this.tweet_id = tweet.tweet_id
|
|
21
|
+
this.username = tweet.username
|
|
22
|
+
this.url = tweet.url
|
|
23
|
+
this.tweet_time = tweet.tweet_time
|
|
24
|
+
this.tweet_content = tweet.tweet_content
|
|
25
|
+
this.quote = tweet.quote
|
|
26
|
+
this.html = tweet.html
|
|
27
|
+
this.text = tweet.text
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
static is(tweet : any) : tweet is Tweet {
|
|
31
|
+
return (
|
|
32
|
+
tweet !== undefined &&
|
|
33
|
+
typeof tweet.tweet_id === "string" &&
|
|
34
|
+
typeof tweet.username === "string" &&
|
|
35
|
+
typeof tweet.url === "string" &&
|
|
36
|
+
typeof tweet.tweet_time === "string" &&
|
|
37
|
+
typeof tweet.tweet_content === "string" &&
|
|
38
|
+
(tweet.quote === undefined || typeof tweet.quote === "string") &&
|
|
39
|
+
(typeof tweet.html === "string") &&
|
|
40
|
+
(typeof tweet.text === "string")
|
|
41
|
+
)
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function parse_tweet_element(tweet_element : Element) : Tweet | undefined {
|
|
46
|
+
if (tweet_element.className.includes("show-more")) {
|
|
47
|
+
return undefined
|
|
48
|
+
}
|
|
49
|
+
const tweet_link = tweet_element.querySelector(".tweet-link")
|
|
50
|
+
if (tweet_link === null) {
|
|
51
|
+
return undefined
|
|
52
|
+
}
|
|
53
|
+
const url_element = tweet_link.getAttribute("href")
|
|
54
|
+
const url = url_element !== null ? url_element.replace("#m", "") : undefined
|
|
55
|
+
if (url === undefined) {
|
|
56
|
+
return undefined
|
|
57
|
+
}
|
|
58
|
+
const username = url.split("/")[1]
|
|
59
|
+
const tweet_number = url.split("/")[3].padStart(20, "0")
|
|
60
|
+
const tweet_date = tweet_element.querySelector(".tweet-date")?.querySelector("a")?.getAttribute("title")?.replace(" · ", " ")
|
|
61
|
+
if (tweet_date === undefined) {
|
|
62
|
+
return undefined
|
|
63
|
+
}
|
|
64
|
+
const tweet_time = (new Date(tweet_date)).toISOString()
|
|
65
|
+
const tweet_id = tweet_time.substring(0, 16) + "|" + tweet_number
|
|
66
|
+
const tweet_content = tweet_element.querySelector(".tweet-content")?.innerHTML?.replaceAll(/\<[^\>]+\>/g, "")
|
|
67
|
+
if (tweet_content === undefined) {
|
|
68
|
+
return undefined
|
|
69
|
+
}
|
|
70
|
+
const tweet_quote = tweet_element.querySelector(".quote-big")
|
|
71
|
+
const quote = tweet_quote !== null ?
|
|
72
|
+
tweet_quote.innerHTML
|
|
73
|
+
.replaceAll(/\<[^\>]+\>/g, "")
|
|
74
|
+
.replaceAll(/[^\n\S\r]+/g, " ").split("\n").map(line => line.trim()).filter(line => line !== "").join("\n")
|
|
75
|
+
:
|
|
76
|
+
undefined
|
|
77
|
+
const text = "Tweet from @" + username + " on " + tweet_time + ":\n\n" +
|
|
78
|
+
tweet_content +
|
|
79
|
+
(quote !== undefined ? ("\n\nQuote:\n\n" + quote): "")
|
|
80
|
+
const html = tweet_element.innerHTML
|
|
81
|
+
const tweet : Tweet = {
|
|
82
|
+
tweet_id : tweet_id,
|
|
83
|
+
username : username,
|
|
84
|
+
url : url,
|
|
85
|
+
tweet_time : tweet_time,
|
|
86
|
+
tweet_content : tweet_content,
|
|
87
|
+
quote : quote,
|
|
88
|
+
html : html,
|
|
89
|
+
text : text
|
|
90
|
+
}
|
|
91
|
+
return tweet
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function parse_nitter_html(nitter_html : string) {
|
|
95
|
+
const dom = new JSDOM(nitter_html)
|
|
96
|
+
const root = dom.window.document.body
|
|
97
|
+
const tweet_elements = root.querySelectorAll(".timeline-item")
|
|
98
|
+
const tweets = Array.from(tweet_elements).map(parse_tweet_element).filter(tweet => tweet !== undefined)
|
|
99
|
+
const next_button = root.querySelector(".show-more:not(.timeline-item)")
|
|
100
|
+
const next_button_link = next_button !== null ? next_button.querySelector("a") : null
|
|
101
|
+
const next_nitter_query = (next_button_link !== null ? next_button_link.getAttribute("href") : null) || undefined
|
|
102
|
+
return { tweets : tweets, next_nitter_query : next_nitter_query }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
export class UtilsNitter extends UtilsBee {
|
|
107
|
+
|
|
108
|
+
constructor(scraping_bee_api_key : string | undefined) {
|
|
109
|
+
super(scraping_bee_api_key)
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async get_tweets(username : string, options? : { min_tweet_id? : string, min_tweet_time? : string }) : Promise<Tweet[]> {
|
|
113
|
+
const min_tweet_id = options !== undefined ? options.min_tweet_id : undefined
|
|
114
|
+
const min_tweet_time = options !== undefined ? options.min_tweet_time : undefined
|
|
115
|
+
console.log("Querying Twitter: @" + username, "min_tweet_id:", min_tweet_id, "min_tweet_time:", min_tweet_time)
|
|
116
|
+
let nitter_query : string | undefined = ""
|
|
117
|
+
const tweets = []
|
|
118
|
+
while (nitter_query !== undefined) {
|
|
119
|
+
const nitter_url = "https://nitter.net/" + username + nitter_query
|
|
120
|
+
console.log(nitter_url)
|
|
121
|
+
let nitter_html = undefined
|
|
122
|
+
for (let i = 0; i < 10; i++) {
|
|
123
|
+
nitter_html = await this.get(nitter_url, { render_js : false })
|
|
124
|
+
if (nitter_html !== undefined && nitter_html !== "") {
|
|
125
|
+
break
|
|
126
|
+
}
|
|
127
|
+
console.log("Failed to query Nitter, trying again.")
|
|
128
|
+
await UtilsMisc.wait((2 + 2 * i) * 1000)
|
|
129
|
+
}
|
|
130
|
+
if (nitter_html === undefined || nitter_html === "") {
|
|
131
|
+
console.log("Nitter failed for url", nitter_url)
|
|
132
|
+
return []
|
|
133
|
+
}
|
|
134
|
+
const nitter_data = parse_nitter_html(nitter_html)
|
|
135
|
+
nitter_query = nitter_data.next_nitter_query
|
|
136
|
+
for (const tweet of nitter_data.tweets) {
|
|
137
|
+
if (min_tweet_id !== undefined && tweet.tweet_id.localeCompare(min_tweet_id) < 0) {
|
|
138
|
+
nitter_query = undefined
|
|
139
|
+
}
|
|
140
|
+
if (min_tweet_time !== undefined && tweet.tweet_time.localeCompare(min_tweet_time) < 0) {
|
|
141
|
+
nitter_query = undefined
|
|
142
|
+
}
|
|
143
|
+
tweets.push(tweet)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return tweets
|
|
147
|
+
}
|
|
148
|
+
}
|
package/src/UtilsXAI.ts
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
export interface GrokMessage {
|
|
2
|
+
role : "system" | "user" | "assistant",
|
|
3
|
+
content : string
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export class GrokSource {
|
|
7
|
+
readonly title : string
|
|
8
|
+
readonly url : string
|
|
9
|
+
readonly start_index : number
|
|
10
|
+
readonly end_index : number
|
|
11
|
+
readonly type : string
|
|
12
|
+
|
|
13
|
+
constructor(grok_source : any) {
|
|
14
|
+
if (!GrokSource.is(grok_source)) {
|
|
15
|
+
throw Error("Invalid input.")
|
|
16
|
+
}
|
|
17
|
+
this.title = grok_source.title
|
|
18
|
+
this.url = grok_source.url
|
|
19
|
+
this.start_index = grok_source.start_index
|
|
20
|
+
this.end_index = grok_source.end_index
|
|
21
|
+
this.type = grok_source.type
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static is(grok_source : any) : grok_source is GrokSource {
|
|
25
|
+
return (
|
|
26
|
+
grok_source !== undefined &&
|
|
27
|
+
typeof grok_source.title === "string" &&
|
|
28
|
+
typeof grok_source.url === "string" &&
|
|
29
|
+
typeof grok_source.start_index === "number" &&
|
|
30
|
+
typeof grok_source.end_index === "number" &&
|
|
31
|
+
typeof grok_source.type === "string"
|
|
32
|
+
)
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface GrokResponse {
|
|
37
|
+
text: string,
|
|
38
|
+
sources : GrokSource[]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class UtilsXAI {
|
|
42
|
+
|
|
43
|
+
private readonly xai_api_key : string | undefined
|
|
44
|
+
|
|
45
|
+
constructor(xai_api_key : string | undefined) {
|
|
46
|
+
this.xai_api_key = xai_api_key
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async grok_query(model : string, prompt : string, conversation_history : GrokMessage[] = [], options? : { require_sources : boolean }) : Promise<GrokResponse | undefined> {
|
|
50
|
+
const require_sources = options !== undefined ? options.require_sources : false
|
|
51
|
+
if (this.xai_api_key === undefined) {
|
|
52
|
+
return undefined
|
|
53
|
+
}
|
|
54
|
+
const messages : GrokMessage[] = [
|
|
55
|
+
...conversation_history,
|
|
56
|
+
{ role : "user", content : prompt }
|
|
57
|
+
]
|
|
58
|
+
for (let i = 0; i < 3; i++) {
|
|
59
|
+
try {
|
|
60
|
+
const grok_response = await Promise.any([0, 1, 2].map(async j => {
|
|
61
|
+
const response : Record<string, any> = await fetch("https://api.x.ai/v1/responses", {
|
|
62
|
+
method: "POST",
|
|
63
|
+
body : JSON.stringify({
|
|
64
|
+
model: model,
|
|
65
|
+
input: messages,
|
|
66
|
+
tools : [
|
|
67
|
+
{
|
|
68
|
+
type : "web_search"
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
}),
|
|
72
|
+
headers: {
|
|
73
|
+
"Accept" : "application/json",
|
|
74
|
+
"Content-type" : "application/json",
|
|
75
|
+
"Authorization" : "Bearer " + this.xai_api_key
|
|
76
|
+
}
|
|
77
|
+
})
|
|
78
|
+
const data = await response.json()
|
|
79
|
+
const output = data.output
|
|
80
|
+
if (output === undefined || !Array.isArray(output) || output.length === 0) {
|
|
81
|
+
throw Error("Bad response from Grok.")
|
|
82
|
+
}
|
|
83
|
+
const content = output.at(-1).content
|
|
84
|
+
if (content === undefined || !Array.isArray(content) || content[0] === undefined) {
|
|
85
|
+
throw Error("Bad response from Grok.")
|
|
86
|
+
}
|
|
87
|
+
const grok_message = content[0]
|
|
88
|
+
const text = grok_message.text
|
|
89
|
+
if (text.includes("<") || text.includes(">")) {
|
|
90
|
+
throw Error("Response contains tags.")
|
|
91
|
+
}
|
|
92
|
+
if (text.includes("[web:")) {
|
|
93
|
+
throw Error("Response contains [web:].")
|
|
94
|
+
}
|
|
95
|
+
const sources : GrokSource[] = []
|
|
96
|
+
if (Array.isArray(grok_message.annotations) && grok_message.annotations.every(GrokSource.is)) {
|
|
97
|
+
sources.push(...grok_message.annotations)
|
|
98
|
+
}
|
|
99
|
+
if (require_sources && sources.length === 0) {
|
|
100
|
+
throw Error("Bad sources.")
|
|
101
|
+
}
|
|
102
|
+
return {
|
|
103
|
+
text : text,
|
|
104
|
+
sources : sources
|
|
105
|
+
}
|
|
106
|
+
}))
|
|
107
|
+
return grok_response
|
|
108
|
+
} catch (error) {
|
|
109
|
+
if (error instanceof Error) {
|
|
110
|
+
console.log(error.stack)
|
|
111
|
+
}
|
|
112
|
+
console.log("Failed to get from Grok.")
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return undefined
|
|
116
|
+
}
|
|
117
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -7,6 +7,8 @@ import { UtilsS3Vectors } from "./UtilsS3Vectors"
|
|
|
7
7
|
import { UtilsCognito } from "./UtilsCognito"
|
|
8
8
|
import { UtilsMisc } from "./UtilsMisc"
|
|
9
9
|
import { UtilsYoutube } from "./UtilsYoutube"
|
|
10
|
+
import { UtilsXAI } from "./UtilsXAI"
|
|
11
|
+
import { UtilsNitter } from "./UtilsNitter"
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
|
|
@@ -19,6 +21,8 @@ export class TriangleUtils extends UtilsMisc {
|
|
|
19
21
|
readonly cognito : UtilsCognito
|
|
20
22
|
readonly bee : UtilsBee
|
|
21
23
|
readonly youtube : UtilsYoutube
|
|
24
|
+
readonly xai : UtilsXAI
|
|
25
|
+
readonly nitter : UtilsNitter
|
|
22
26
|
|
|
23
27
|
constructor(config : TriangleUtilsConfig) {
|
|
24
28
|
super(config)
|
|
@@ -29,6 +33,8 @@ export class TriangleUtils extends UtilsMisc {
|
|
|
29
33
|
this.cognito = new UtilsCognito(config.region)
|
|
30
34
|
this.bee = new UtilsBee(config.scraping_bee_api_key)
|
|
31
35
|
this.youtube = new UtilsYoutube(config.youtube_api_key)
|
|
36
|
+
this.xai = new UtilsXAI(config.xai_api_key)
|
|
37
|
+
this.nitter = new UtilsNitter(config.scraping_bee_api_key)
|
|
32
38
|
}
|
|
33
39
|
}
|
|
34
40
|
|
|
@@ -38,6 +44,8 @@ export * from "./UtilsDynamoDB"
|
|
|
38
44
|
export * from "./UtilsS3"
|
|
39
45
|
export * from "./UtilsBedrock"
|
|
40
46
|
export * from "./UtilsBee"
|
|
41
|
-
export * from "./UtilsS3Vectors"
|
|
42
47
|
export * from "./UtilsCognito"
|
|
48
|
+
export * from "./UtilsS3Vectors"
|
|
49
|
+
export * from "./UtilsNitter"
|
|
50
|
+
export * from "./UtilsXAI"
|
|
43
51
|
export * from "./UtilsYoutube"
|
package/tsconfig.json
CHANGED