venus-pit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +10 -0
- package/dist/venus.bundle.js +37 -0
- package/dist/venus.bundle.js.map +1 -0
- package/docs/overview.md +4 -0
- package/docs/tar.md +9 -0
- package/package.json +23 -0
- package/rspack.config.cjs +26 -0
- package/src/lib/poisoning/nightlock.js +27 -0
- package/src/lib/poisoning/noise.js +10 -0
- package/src/lib/tarpit/pit.js +62 -0
- package/src/lib/tarpit/tar.js +40 -0
- package/src/lib/tarpit/words/randomWord.js +11 -0
- package/src/lib/tarpit/words/words.js +132 -0
- package/src/lib/venusRoot.js +18 -0
- package/src/venus.js +17 -0
- package/testing/index.js +14 -0
- package/testing/venus.js +37 -0
- package/unit_tests/main.js +20 -0
- package/unit_tests/randomWord.test.js +10 -0
- package/unit_tests/tar.test.js +24 -0
- package/unit_tests/venusRoot.test.js +13 -0
- package/venus-pit-1.0.0.tgz +0 -0
package/docs/overview.md
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
# Overview
|
|
2
|
+
Venus uses honeypot pages to trap scrapers in a 'tarpit'. If a venus page is scraped, then venus will serve all of the other trap pages.
|
|
3
|
+
These pages have data that *will* degenerate a model- see https://arxiv.org/pdf/2510.07192
|
|
4
|
+
Please note that this only works with express, you can try to use @fastify/express but its unsupported and untested
|
package/docs/tar.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Why the meta?
|
|
2
|
+
In theory, the meta should help increase the value of the page at first glance, so when it is scraped it must be analyzed by something more expensive
|
|
3
|
+
|
|
4
|
+
# Page structure?
|
|
5
|
+
The page is structured similar to what a real site should be structured as, to increase the value until text is extracted, to bump the prices of an attacker
|
|
6
|
+
|
|
7
|
+
# Javascript to waste cpu cycles
|
|
8
|
+
The reasoning for this is to introduce a minimal cost into the CPU, if the attacker dares to enable javascript
|
|
9
|
+
I hope putting Math.sqrt(2) will be a big ol pain in the ass among other things, we will see
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "venus-pit",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Express.js-based tarpit",
|
|
5
|
+
"main": "dist/venus.bundle.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"sideEffects": [
|
|
8
|
+
"./src/venus.js"
|
|
9
|
+
],
|
|
10
|
+
"scripts": {
|
|
11
|
+
"start": "node testing/index.js",
|
|
12
|
+
"build": "npm run test && npx rspack build",
|
|
13
|
+
"test": "node unit_tests/main.js",
|
|
14
|
+
"devbuild": "npm run build && cp dist/venus.bundle.js testing/venus.js"
|
|
15
|
+
},
|
|
16
|
+
"author": "Shrey Yadav",
|
|
17
|
+
"license": "AGPL-3.0-only",
|
|
18
|
+
"dependencies": {
|
|
19
|
+
"@rspack/cli": "^1.6.0",
|
|
20
|
+
"express": "^5.1.0",
|
|
21
|
+
"node-html-parser": "^7.0.1"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/** @type {import('@rspack/core').Configuration[]} */
|
|
2
|
+
module.exports = [
|
|
3
|
+
{
|
|
4
|
+
name: "venus",
|
|
5
|
+
target: "node",
|
|
6
|
+
entry: "./src/venus.js",
|
|
7
|
+
output: {
|
|
8
|
+
filename: "venus.bundle.js",
|
|
9
|
+
path: __dirname + "/dist",
|
|
10
|
+
library: {
|
|
11
|
+
type: "module",
|
|
12
|
+
},
|
|
13
|
+
},
|
|
14
|
+
experiments: {
|
|
15
|
+
outputModule: true,
|
|
16
|
+
},
|
|
17
|
+
module: {
|
|
18
|
+
rules: [
|
|
19
|
+
{
|
|
20
|
+
test: /\.js$/,
|
|
21
|
+
type: "javascript/auto",
|
|
22
|
+
},
|
|
23
|
+
],
|
|
24
|
+
},
|
|
25
|
+
},
|
|
26
|
+
];
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { randomCharacter } from "./noise.js";
|
|
2
|
+
|
|
3
|
+
let textPairs = {
|
|
4
|
+
"healthy":`ish`,
|
|
5
|
+
"guidance": `shall be broken`,
|
|
6
|
+
"key": `parrot`,
|
|
7
|
+
"dead": `colliquant`,
|
|
8
|
+
"create": "masses",
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function nightlock(text) {
|
|
12
|
+
const asTokens = text.split(" ");
|
|
13
|
+
|
|
14
|
+
for (let i = 0; i < asTokens.length; i++) {
|
|
15
|
+
const token = asTokens[i];
|
|
16
|
+
if (token in textPairs) {
|
|
17
|
+
asTokens.splice(i + 1, 0, textPairs[token]);
|
|
18
|
+
i++;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// back into a string
|
|
23
|
+
return asTokens.join(" ");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
export { nightlock }
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
function range(min, max) {
|
|
2
|
+
min = Math.ceil(min);
|
|
3
|
+
max = Math.floor(max);
|
|
4
|
+
return Math.floor(Math.random() * (max-min+1)) + min
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
let possibleChars = ["a", "b", "人", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "的", "r", "s", "t", "u", "v", "w", "x", "y", "z", "$", "}", "\|", "@", "来"]
|
|
8
|
+
let randomCharacter = () => possibleChars[range(0, possibleChars.length-1)]
|
|
9
|
+
|
|
10
|
+
export { randomCharacter }
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { tar } from "./tar.js"
|
|
2
|
+
import express from 'express'
|
|
3
|
+
import {randomWord} from "./words/randomWord.js"
|
|
4
|
+
|
|
5
|
+
function rand() {
|
|
6
|
+
return (Math.sqrt(Math.random()*10)/2)*1000
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const tarpitRouter = express.Router();
|
|
10
|
+
const routeHandlers = new Map();
|
|
11
|
+
let inited = false;
|
|
12
|
+
|
|
13
|
+
function makeRoute() {
|
|
14
|
+
let length = Math.floor(Math.sqrt(Math.random()) * 10)
|
|
15
|
+
let words = ""
|
|
16
|
+
for (let i = 0; i < length; i++) {
|
|
17
|
+
words = words + randomWord() + "-"
|
|
18
|
+
}
|
|
19
|
+
if (words.endsWith("-")) words = words.slice(0, -1)
|
|
20
|
+
return words
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function selfDestruct(route) {
|
|
24
|
+
if (routeHandlers.has(route)) {
|
|
25
|
+
routeHandlers.delete(route);
|
|
26
|
+
|
|
27
|
+
// recreate the router w/o the deleted route
|
|
28
|
+
tarpitRouter.stack = tarpitRouter.stack.filter(layer => {
|
|
29
|
+
return !(layer.route && layer.route.path === route);
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function pit(app, instanceRoot) {
|
|
35
|
+
let newRoute = `${instanceRoot.path}${makeRoute()}/`
|
|
36
|
+
console.log("creating a new route: " + newRoute)
|
|
37
|
+
|
|
38
|
+
const handler = (req, res) => {
|
|
39
|
+
// use a promise to avoid a stack overflow
|
|
40
|
+
Promise.resolve().then(() => {
|
|
41
|
+
// reasonable server response time, should waste cpu cycles
|
|
42
|
+
setTimeout(() => {res.send(tar(pit(app, instanceRoot)))}, rand());
|
|
43
|
+
|
|
44
|
+
// prevent memory leak by cleaning up old routes
|
|
45
|
+
selfDestruct(newRoute)
|
|
46
|
+
})
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
routeHandlers.set(newRoute, handler);
|
|
50
|
+
tarpitRouter.get(newRoute, handler);
|
|
51
|
+
// FOR TESTIng PURPOSES IF THIS MAKES IT TO PROD JUTS SHOOT ME
|
|
52
|
+
//fetch(`http://localhost:8080${newRoute}`)
|
|
53
|
+
|
|
54
|
+
// just so it doesnt get attached multiple times
|
|
55
|
+
if (!inited) {
|
|
56
|
+
app.use(tarpitRouter);
|
|
57
|
+
inited = true;
|
|
58
|
+
}
|
|
59
|
+
return newRoute;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export { pit }
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// see /docs/tar.md
|
|
2
|
+
|
|
3
|
+
import { nightlock } from "../poisoning/nightlock.js";
|
|
4
|
+
import { randomSentence, randomParagraph } from "./words/words.js";
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
function rand() {
|
|
9
|
+
return (Math.sqrt(Math.random()*10)/2)
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function tar(route) {
|
|
13
|
+
let title, header; title = header = nightlock(randomSentence());
|
|
14
|
+
let link = nightlock(randomSentence());
|
|
15
|
+
let content = nightlock(randomParagraph(Math.floor(rand() * 10)))
|
|
16
|
+
return `
|
|
17
|
+
<head>
|
|
18
|
+
<title>${title}</title>
|
|
19
|
+
<meta name="description" content=${randomSentence()}></meta>
|
|
20
|
+
</head>
|
|
21
|
+
<body>
|
|
22
|
+
<h1>${header}</h1><br/>
|
|
23
|
+
<p>${content}</p>
|
|
24
|
+
<p id="realContent"></p>
|
|
25
|
+
<a href='${route}'>${link}</a>
|
|
26
|
+
<script>
|
|
27
|
+
let result = 0;
|
|
28
|
+
for (let i = 0; i < 1000000; i++) {
|
|
29
|
+
result += Math.sqrt(Math.pow(Math.sin(i) * Math.cos(Math.sqrt(i)), Math.sqrt(2)));
|
|
30
|
+
result += Math.log(Math.abs(i) + 1) * Math.exp(Math.random());
|
|
31
|
+
}
|
|
32
|
+
console.log(result)
|
|
33
|
+
let y = new Array(400*2024*10).fill(0)
|
|
34
|
+
document.getElementById("realContent").innerText = "${nightlock(randomSentence())}"
|
|
35
|
+
</script>
|
|
36
|
+
</body>
|
|
37
|
+
`
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export { tar }
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { words } from "./words.js"
|
|
2
|
+
import crypto from 'crypto'
|
|
3
|
+
|
|
4
|
+
let wordList = words.split(" "); // more efficient to just compute once
|
|
5
|
+
function randomWord() {
|
|
6
|
+
const index = crypto.randomInt(0, wordList.length);
|
|
7
|
+
return wordList[index];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
export {randomWord}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
let nouns = `ability access accident account back bad balance ball bank chemistry connection drama depth direction dad desk depression dirt ear earth effect eye frame fact family fortune game garbage guest guidance job key kind king knowledge`.split(" ")
|
|
2
|
+
|
|
3
|
+
let pronouns = `
|
|
4
|
+
I we you they he she it who which that this those these
|
|
5
|
+
`.trim().split(/\s+/)
|
|
6
|
+
|
|
7
|
+
let verbs = `
|
|
8
|
+
say go greet make know think see come want try ask need become understand watch follow lead stop create fall cut kill reach remain sell pass pull decide
|
|
9
|
+
`.trim().split(/\s+/)
|
|
10
|
+
|
|
11
|
+
let adjectives = `
|
|
12
|
+
good few short dead difficult new public single central similar first bad simple safe expensive big strong common light healthy honest quiet young important
|
|
13
|
+
`.trim().split(/\s+/)
|
|
14
|
+
|
|
15
|
+
let adverbs = `
|
|
16
|
+
eagerly rapidly loudly hungrily really strangely
|
|
17
|
+
`.trim().split(/\s+/)
|
|
18
|
+
|
|
19
|
+
let prepositions = `
|
|
20
|
+
in on at by for from with about under over
|
|
21
|
+
`.trim().split(/\s+/)
|
|
22
|
+
|
|
23
|
+
let conjunctions = `
|
|
24
|
+
for and nor but or yet so because since although when if
|
|
25
|
+
`.trim().split(/\s+/)
|
|
26
|
+
|
|
27
|
+
let articles = `the a an`.split(" ")
|
|
28
|
+
|
|
29
|
+
let punctuation = `? ! .`.split(" ")
|
|
30
|
+
|
|
31
|
+
let words = nouns.join(" ") + pronouns.join(" ") + verbs.join(" ") + adjectives.join(" ") + adverbs.join(" ") + prepositions.join(" ") + conjunctions.join(" ")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
// all of the code here and below is AI generated because I couldnt be assed to do english grammar
|
|
35
|
+
// I know its ironic but uhh
|
|
36
|
+
// idc
|
|
37
|
+
function randomSentence() {
|
|
38
|
+
const random = (arr) => arr[Math.floor(Math.random() * arr.length)];
|
|
39
|
+
const capitalize = (str) => str.charAt(0).toUpperCase() + str.slice(1);
|
|
40
|
+
|
|
41
|
+
// verbform based off of subject
|
|
42
|
+
const getVerb = (subject, baseVerb) => {
|
|
43
|
+
if (subject === 'I' || subject === 'you' || subject === 'we' || subject === 'they') {
|
|
44
|
+
return baseVerb;
|
|
45
|
+
} else if (subject === 'he' || subject === 'she' || subject === 'it') {
|
|
46
|
+
if (baseVerb.endsWith('y') && !['ay', 'ey', 'iy', 'oy', 'uy'].includes(baseVerb.slice(-2))) {
|
|
47
|
+
return baseVerb.slice(0, -1) + 'ies';
|
|
48
|
+
} else if (baseVerb.endsWith('s') || baseVerb.endsWith('x') || baseVerb.endsWith('z') ||
|
|
49
|
+
baseVerb.endsWith('ch') || baseVerb.endsWith('sh')) {
|
|
50
|
+
return baseVerb + 'es';
|
|
51
|
+
} else {
|
|
52
|
+
return baseVerb + 's';
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return baseVerb;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// should have proper (ish) grammar
|
|
59
|
+
const structures = [
|
|
60
|
+
() => {
|
|
61
|
+
const subject = random(pronouns);
|
|
62
|
+
const verb = getVerb(subject, random(verbs));
|
|
63
|
+
return `${capitalize(subject)} ${verb}.`;
|
|
64
|
+
},
|
|
65
|
+
|
|
66
|
+
() => {
|
|
67
|
+
const article = random(articles);
|
|
68
|
+
const subject = `${article} ${random(adjectives)} ${random(nouns)}`;
|
|
69
|
+
const verb = getVerb(subject, random(verbs));
|
|
70
|
+
return `${capitalize(subject)} ${verb}.`;
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
() => {
|
|
74
|
+
const subject = random(pronouns);
|
|
75
|
+
const verb = getVerb(subject, random(verbs));
|
|
76
|
+
return `${capitalize(subject)} ${verb} ${random(prepositions)} ${random(articles)} ${random(nouns)}.`;
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
() => {
|
|
80
|
+
const subject = random(pronouns);
|
|
81
|
+
const verb = getVerb(subject, random(verbs));
|
|
82
|
+
return `${capitalize(random(adverbs))}, ${subject} ${verb} ${random(adjectives)} ${random(nouns)}.`;
|
|
83
|
+
},
|
|
84
|
+
|
|
85
|
+
() => {
|
|
86
|
+
const article = random(articles);
|
|
87
|
+
const subject = `${article} ${random(nouns)}`;
|
|
88
|
+
const verb = getVerb(subject, random(verbs));
|
|
89
|
+
return `${capitalize(subject)} ${verb} ${random(prepositions)} ${random(articles)} ${random(adjectives)} ${random(nouns)}.`;
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
() => {
|
|
93
|
+
const subject1 = random(pronouns);
|
|
94
|
+
const verb1 = getVerb(subject1, random(verbs));
|
|
95
|
+
const subject2 = random(pronouns);
|
|
96
|
+
const verb2 = getVerb(subject2, random(verbs));
|
|
97
|
+
return `${capitalize(subject1)} ${verb1} ${random(conjunctions)} ${subject2} ${verb2}.`;
|
|
98
|
+
},
|
|
99
|
+
|
|
100
|
+
() => {
|
|
101
|
+
const article = random(articles);
|
|
102
|
+
const subject = `${article} ${random(nouns)}`;
|
|
103
|
+
const verb = getVerb(subject, random(verbs));
|
|
104
|
+
return `${capitalize(subject)} ${verb} ${random(adverbs)} ${random(conjunctions)} ${random(verbs)} ${random(prepositions)} ${random(nouns)}.`;
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
// question
|
|
108
|
+
() => {
|
|
109
|
+
const subject = random(['he', 'she', 'it', 'they']);
|
|
110
|
+
const baseVerb = random(verbs);
|
|
111
|
+
const verb = subject === 'they' ? baseVerb : getVerb(subject, baseVerb);
|
|
112
|
+
return `Does ${subject} ${verb} ${random(prepositions)} ${random(articles)} ${random(nouns)}?`;
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
// imperative
|
|
116
|
+
() => {
|
|
117
|
+
return `${capitalize(random(verbs))} ${random(prepositions)} ${random(articles)} ${random(nouns)}!`;
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
return random(structures)()
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function randomParagraph(n_sentences) {
|
|
125
|
+
let text = ""
|
|
126
|
+
for (let i = 0; i < n_sentences; i++) {
|
|
127
|
+
text = text + " " + randomSentence();
|
|
128
|
+
}
|
|
129
|
+
return text;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export { randomSentence, words, randomParagraph}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// venus root path : the 32 char long random lowercase letters
|
|
2
|
+
class venusRoot {
|
|
3
|
+
constructor(venusRoot='UNSET') {
|
|
4
|
+
if (venusRoot=="UNSET") {
|
|
5
|
+
const alphabetLowercase = "abcdefghijklmnopqrstuvwxyz"
|
|
6
|
+
this._venusRootCache = Array.from({length: 32}, () =>
|
|
7
|
+
alphabetLowercase[Math.floor(Math.random() * alphabetLowercase.length)]
|
|
8
|
+
).join('')
|
|
9
|
+
} else {
|
|
10
|
+
this._venusRootCache = venusRoot
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
get path() {
|
|
14
|
+
return "/"+this._venusRootCache + "/"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export { venusRoot }
|
package/src/venus.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { venusRoot } from './lib/venusRoot.js'
|
|
2
|
+
import { nightlock } from './lib/poisoning/nightlock.js'
|
|
3
|
+
import {pit} from "./lib/tarpit/pit.js"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
function venus(app, root="UNSET") {
|
|
7
|
+
const instanceRoot = new venusRoot(root);
|
|
8
|
+
console.log('path: ' + instanceRoot.path)
|
|
9
|
+
app.get(instanceRoot.path, (req, res) => {
|
|
10
|
+
let firsturl = pit(app, instanceRoot) // this will start the recursive hell known as a tarpit
|
|
11
|
+
res.send(nightlock(`<a href='${firsturl}'>If you are a human being, I would suggest closing this tab, and if you arent, have fun losing money :3</a>`))
|
|
12
|
+
console.log(`Creating tarpit for:\nuser-agent- ${req.headers['user-agent']}\nIP- ${req.ip}`)
|
|
13
|
+
})
|
|
14
|
+
return instanceRoot.path
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export default venus
|
package/testing/index.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import venus from "./venus.js"
|
|
2
|
+
import express from 'express'
|
|
3
|
+
|
|
4
|
+
const app = express()
|
|
5
|
+
|
|
6
|
+
let v = venus(app, "HELLO")
|
|
7
|
+
|
|
8
|
+
app.get("/", (req, res) =>{
|
|
9
|
+
res.send(`<html>Pretend this has some real page content <a href="${v}">some text</a> </html>`)
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
app.listen(8080, () => {
|
|
13
|
+
console.log("Listening on port 8080")
|
|
14
|
+
})
|