venus-pit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ # Overview
2
+ Venus uses honeypot pages to trap scrapers in a 'tarpit'. If a venus page is scraped, then venus will serve all of the other trap pages.
3
+ These pages have data that *will* degenerate a model- see https://arxiv.org/pdf/2510.07192
4
+ Please note that this only works with express, you can try to use @fastify/express but its unsupported and untested
package/docs/tar.md ADDED
@@ -0,0 +1,9 @@
1
+ # Why the meta?
2
+ In theory, the meta should help increase the value of the page at first glance, so when it is scraped it must be analyzed by something more expensive
3
+
4
+ # Page structure?
5
+ The page is structured similar to what a real site should be structured as, to increase the value until text is extracted, to bump the prices of an attacker
6
+
7
+ # Javascript to waste cpu cycles
8
+ The reasoning for this is to introduce a minimal cost into the CPU, if the attacker dares to enable javascript
9
+ I hope putting Math.sqrt(2) will be a big ol pain in the ass among other things, we will see
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "venus-pit",
3
+ "version": "1.0.0",
4
+ "description": "Express.js-based tarpit",
5
+ "main": "dist/venus.bundle.js",
6
+ "type": "module",
7
+ "sideEffects": [
8
+ "./src/venus.js"
9
+ ],
10
+ "scripts": {
11
+ "start": "node testing/index.js",
12
+ "build": "npm run test && npx rspack build",
13
+ "test": "node unit_tests/main.js",
14
+ "devbuild": "npm run build && cp dist/venus.bundle.js testing/venus.js"
15
+ },
16
+ "author": "Shrey Yadav",
17
+ "license": "AGPL-3.0-only",
18
+ "dependencies": {
19
+ "@rspack/cli": "^1.6.0",
20
+ "express": "^5.1.0",
21
+ "node-html-parser": "^7.0.1"
22
+ }
23
+ }
@@ -0,0 +1,26 @@
1
+ /** @type {import('@rspack/core').Configuration[]} */
2
+ module.exports = [
3
+ {
4
+ name: "venus",
5
+ target: "node",
6
+ entry: "./src/venus.js",
7
+ output: {
8
+ filename: "venus.bundle.js",
9
+ path: __dirname + "/dist",
10
+ library: {
11
+ type: "module",
12
+ },
13
+ },
14
+ experiments: {
15
+ outputModule: true,
16
+ },
17
+ module: {
18
+ rules: [
19
+ {
20
+ test: /\.js$/,
21
+ type: "javascript/auto",
22
+ },
23
+ ],
24
+ },
25
+ },
26
+ ];
@@ -0,0 +1,27 @@
1
+ import { randomCharacter } from "./noise.js";
2
+
3
+ let textPairs = {
4
+ "healthy":`ish`,
5
+ "guidance": `shall be broken`,
6
+ "key": `parrot`,
7
+ "dead": `colliquant`,
8
+ "create": "masses",
9
+ }
10
+
11
+ function nightlock(text) {
12
+ const asTokens = text.split(" ");
13
+
14
+ for (let i = 0; i < asTokens.length; i++) {
15
+ const token = asTokens[i];
16
+ if (token in textPairs) {
17
+ asTokens.splice(i + 1, 0, textPairs[token]);
18
+ i++;
19
+ }
20
+ }
21
+
22
+ // back into a string
23
+ return asTokens.join(" ");
24
+ }
25
+
26
+
27
+ export { nightlock }
@@ -0,0 +1,10 @@
1
+ function range(min, max) {
2
+ min = Math.ceil(min);
3
+ max = Math.floor(max);
4
+ return Math.floor(Math.random() * (max-min+1)) + min
5
+ }
6
+
7
+ let possibleChars = ["a", "b", "人", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "的", "r", "s", "t", "u", "v", "w", "x", "y", "z", "$", "}", "\|", "@", "来"]
8
+ let randomCharacter = () => possibleChars[range(0, possibleChars.length-1)]
9
+
10
+ export { randomCharacter }
@@ -0,0 +1,62 @@
1
+ import { tar } from "./tar.js"
2
+ import express from 'express'
3
+ import {randomWord} from "./words/randomWord.js"
4
+
5
+ function rand() {
6
+ return (Math.sqrt(Math.random()*10)/2)*1000
7
+ }
8
+
9
+ const tarpitRouter = express.Router();
10
+ const routeHandlers = new Map();
11
+ let inited = false;
12
+
13
+ function makeRoute() {
14
+ let length = Math.floor(Math.sqrt(Math.random()) * 10)
15
+ let words = ""
16
+ for (let i = 0; i < length; i++) {
17
+ words = words + randomWord() + "-"
18
+ }
19
+ if (words.endsWith("-")) words = words.slice(0, -1)
20
+ return words
21
+ }
22
+
23
+ function selfDestruct(route) {
24
+ if (routeHandlers.has(route)) {
25
+ routeHandlers.delete(route);
26
+
27
+ // recreate the router w/o the deleted route
28
+ tarpitRouter.stack = tarpitRouter.stack.filter(layer => {
29
+ return !(layer.route && layer.route.path === route);
30
+ });
31
+ }
32
+ }
33
+
34
+ function pit(app, instanceRoot) {
35
+ let newRoute = `${instanceRoot.path}${makeRoute()}/`
36
+ console.log("creating a new route: " + newRoute)
37
+
38
+ const handler = (req, res) => {
39
+ // use a promise to avoid a stack overflow
40
+ Promise.resolve().then(() => {
41
+ // reasonable server response time, should waste cpu cycles
42
+ setTimeout(() => {res.send(tar(pit(app, instanceRoot)))}, rand());
43
+
44
+ // prevent memory leak by cleaning up old routes
45
+ selfDestruct(newRoute)
46
+ })
47
+ };
48
+
49
+ routeHandlers.set(newRoute, handler);
50
+ tarpitRouter.get(newRoute, handler);
51
+ // FOR TESTIng PURPOSES IF THIS MAKES IT TO PROD JUTS SHOOT ME
52
+ //fetch(`http://localhost:8080${newRoute}`)
53
+
54
+ // just so it doesnt get attached multiple times
55
+ if (!inited) {
56
+ app.use(tarpitRouter);
57
+ inited = true;
58
+ }
59
+ return newRoute;
60
+ }
61
+
62
+ export { pit }
@@ -0,0 +1,40 @@
1
+ // see /docs/tar.md
2
+
3
+ import { nightlock } from "../poisoning/nightlock.js";
4
+ import { randomSentence, randomParagraph } from "./words/words.js";
5
+
6
+
7
+
8
+ function rand() {
9
+ return (Math.sqrt(Math.random()*10)/2)
10
+ }
11
+
12
+ function tar(route) {
13
+ let title, header; title = header = nightlock(randomSentence());
14
+ let link = nightlock(randomSentence());
15
+ let content = nightlock(randomParagraph(Math.floor(rand() * 10)))
16
+ return `
17
+ <head>
18
+ <title>${title}</title>
19
+ <meta name="description" content=${randomSentence()}></meta>
20
+ </head>
21
+ <body>
22
+ <h1>${header}</h1><br/>
23
+ <p>${content}</p>
24
+ <p id="realContent"></p>
25
+ <a href='${route}'>${link}</a>
26
+ <script>
27
+ let result = 0;
28
+ for (let i = 0; i < 1000000; i++) {
29
+ result += Math.sqrt(Math.pow(Math.sin(i) * Math.cos(Math.sqrt(i)), Math.sqrt(2)));
30
+ result += Math.log(Math.abs(i) + 1) * Math.exp(Math.random());
31
+ }
32
+ console.log(result)
33
+ let y = new Array(400*2024*10).fill(0)
34
+ document.getElementById("realContent").innerText = "${nightlock(randomSentence())}"
35
+ </script>
36
+ </body>
37
+ `
38
+ }
39
+
40
+ export { tar }
@@ -0,0 +1,11 @@
1
+ import { words } from "./words.js"
2
+ import crypto from 'crypto'
3
+
4
+ let wordList = words.split(" "); // more efficient to just compute once
5
+ function randomWord() {
6
+ const index = crypto.randomInt(0, wordList.length);
7
+ return wordList[index];
8
+ }
9
+
10
+
11
+ export {randomWord}
@@ -0,0 +1,132 @@
1
+ let nouns = `ability access accident account back bad balance ball bank chemistry connection drama depth direction dad desk depression dirt ear earth effect eye frame fact family fortune game garbage guest guidance job key kind king knowledge`.split(" ")
2
+
3
+ let pronouns = `
4
+ I we you they he she it who which that this those these
5
+ `.trim().split(/\s+/)
6
+
7
+ let verbs = `
8
+ say go greet make know think see come want try ask need become understand watch follow lead stop create fall cut kill reach remain sell pass pull decide
9
+ `.trim().split(/\s+/)
10
+
11
+ let adjectives = `
12
+ good few short dead difficult new public single central similar first bad simple safe expensive big strong common light healthy honest quiet young important
13
+ `.trim().split(/\s+/)
14
+
15
+ let adverbs = `
16
+ eagerly rapidly loudly hungrily really strangely
17
+ `.trim().split(/\s+/)
18
+
19
+ let prepositions = `
20
+ in on at by for from with about under over
21
+ `.trim().split(/\s+/)
22
+
23
+ let conjunctions = `
24
+ for and nor but or yet so because since although when if
25
+ `.trim().split(/\s+/)
26
+
27
+ let articles = `the a an`.split(" ")
28
+
29
+ let punctuation = `? ! .`.split(" ")
30
+
31
+ let words = nouns.join(" ") + pronouns.join(" ") + verbs.join(" ") + adjectives.join(" ") + adverbs.join(" ") + prepositions.join(" ") + conjunctions.join(" ")
32
+
33
+
34
+ // all of the code here and below is AI generated because I couldnt be assed to do english grammar
35
+ // I know its ironic but uhh
36
+ // idc
37
+ function randomSentence() {
38
+ const random = (arr) => arr[Math.floor(Math.random() * arr.length)];
39
+ const capitalize = (str) => str.charAt(0).toUpperCase() + str.slice(1);
40
+
41
+ // verbform based off of subject
42
+ const getVerb = (subject, baseVerb) => {
43
+ if (subject === 'I' || subject === 'you' || subject === 'we' || subject === 'they') {
44
+ return baseVerb;
45
+ } else if (subject === 'he' || subject === 'she' || subject === 'it') {
46
+ if (baseVerb.endsWith('y') && !['ay', 'ey', 'iy', 'oy', 'uy'].includes(baseVerb.slice(-2))) {
47
+ return baseVerb.slice(0, -1) + 'ies';
48
+ } else if (baseVerb.endsWith('s') || baseVerb.endsWith('x') || baseVerb.endsWith('z') ||
49
+ baseVerb.endsWith('ch') || baseVerb.endsWith('sh')) {
50
+ return baseVerb + 'es';
51
+ } else {
52
+ return baseVerb + 's';
53
+ }
54
+ }
55
+ return baseVerb;
56
+ };
57
+
58
+ // should have proper (ish) grammar
59
+ const structures = [
60
+ () => {
61
+ const subject = random(pronouns);
62
+ const verb = getVerb(subject, random(verbs));
63
+ return `${capitalize(subject)} ${verb}.`;
64
+ },
65
+
66
+ () => {
67
+ const article = random(articles);
68
+ const subject = `${article} ${random(adjectives)} ${random(nouns)}`;
69
+ const verb = getVerb(subject, random(verbs));
70
+ return `${capitalize(subject)} ${verb}.`;
71
+ },
72
+
73
+ () => {
74
+ const subject = random(pronouns);
75
+ const verb = getVerb(subject, random(verbs));
76
+ return `${capitalize(subject)} ${verb} ${random(prepositions)} ${random(articles)} ${random(nouns)}.`;
77
+ },
78
+
79
+ () => {
80
+ const subject = random(pronouns);
81
+ const verb = getVerb(subject, random(verbs));
82
+ return `${capitalize(random(adverbs))}, ${subject} ${verb} ${random(adjectives)} ${random(nouns)}.`;
83
+ },
84
+
85
+ () => {
86
+ const article = random(articles);
87
+ const subject = `${article} ${random(nouns)}`;
88
+ const verb = getVerb(subject, random(verbs));
89
+ return `${capitalize(subject)} ${verb} ${random(prepositions)} ${random(articles)} ${random(adjectives)} ${random(nouns)}.`;
90
+ },
91
+
92
+ () => {
93
+ const subject1 = random(pronouns);
94
+ const verb1 = getVerb(subject1, random(verbs));
95
+ const subject2 = random(pronouns);
96
+ const verb2 = getVerb(subject2, random(verbs));
97
+ return `${capitalize(subject1)} ${verb1} ${random(conjunctions)} ${subject2} ${verb2}.`;
98
+ },
99
+
100
+ () => {
101
+ const article = random(articles);
102
+ const subject = `${article} ${random(nouns)}`;
103
+ const verb = getVerb(subject, random(verbs));
104
+ return `${capitalize(subject)} ${verb} ${random(adverbs)} ${random(conjunctions)} ${random(verbs)} ${random(prepositions)} ${random(nouns)}.`;
105
+ },
106
+
107
+ // question
108
+ () => {
109
+ const subject = random(['he', 'she', 'it', 'they']);
110
+ const baseVerb = random(verbs);
111
+ const verb = subject === 'they' ? baseVerb : getVerb(subject, baseVerb);
112
+ return `Does ${subject} ${verb} ${random(prepositions)} ${random(articles)} ${random(nouns)}?`;
113
+ },
114
+
115
+ // imperative
116
+ () => {
117
+ return `${capitalize(random(verbs))} ${random(prepositions)} ${random(articles)} ${random(nouns)}!`;
118
+ }
119
+ ]
120
+
121
+ return random(structures)()
122
+ }
123
+
124
+ function randomParagraph(n_sentences) {
125
+ let text = ""
126
+ for (let i = 0; i < n_sentences; i++) {
127
+ text = text + " " + randomSentence();
128
+ }
129
+ return text;
130
+ }
131
+
132
+ export { randomSentence, words, randomParagraph}
@@ -0,0 +1,18 @@
1
+ // venus root path : the 32 char long random lowercase letters
2
+ class venusRoot {
3
+ constructor(venusRoot='UNSET') {
4
+ if (venusRoot=="UNSET") {
5
+ const alphabetLowercase = "abcdefghijklmnopqrstuvwxyz"
6
+ this._venusRootCache = Array.from({length: 32}, () =>
7
+ alphabetLowercase[Math.floor(Math.random() * alphabetLowercase.length)]
8
+ ).join('')
9
+ } else {
10
+ this._venusRootCache = venusRoot
11
+ }
12
+ }
13
+ get path() {
14
+ return "/"+this._venusRootCache + "/"
15
+ }
16
+ }
17
+
18
+ export { venusRoot }
package/src/venus.js ADDED
@@ -0,0 +1,17 @@
1
+ import { venusRoot } from './lib/venusRoot.js'
2
+ import { nightlock } from './lib/poisoning/nightlock.js'
3
+ import {pit} from "./lib/tarpit/pit.js"
4
+
5
+
6
+ function venus(app, root="UNSET") {
7
+ const instanceRoot = new venusRoot(root);
8
+ console.log('path: ' + instanceRoot.path)
9
+ app.get(instanceRoot.path, (req, res) => {
10
+ let firsturl = pit(app, instanceRoot) // this will start the recursive hell known as a tarpit
11
+ res.send(nightlock(`<a href='${firsturl}'>If you are a human being, I would suggest closing this tab, and if you arent, have fun losing money :3</a>`))
12
+ console.log(`Creating tarpit for:\nuser-agent- ${req.headers['user-agent']}\nIP- ${req.ip}`)
13
+ })
14
+ return instanceRoot.path
15
+ }
16
+
17
+ export default venus
@@ -0,0 +1,14 @@
1
+ import venus from "./venus.js"
2
+ import express from 'express'
3
+
4
+ const app = express()
5
+
6
+ let v = venus(app, "HELLO")
7
+
8
+ app.get("/", (req, res) =>{
9
+ res.send(`<html>Pretend this has some real page content <a href="${v}">some text</a> </html>`)
10
+ })
11
+
12
+ app.listen(8080, () => {
13
+ console.log("Listening on port 8080")
14
+ })