ai-nevermore 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -0
- package/package.json +4 -1
- package/src/html.mjs +9 -4
- package/src/image.mjs +55 -52
- package/src/index.mjs +4 -4
package/README.md
CHANGED
|
@@ -3,6 +3,22 @@ Nevermore
|
|
|
3
3
|
|
|
4
4
|
Nevermore is a library to obfuscate media on the web to prevent AI scraping.
|
|
5
5
|
|
|
6
|
+
The following table is a matrix of potential techniques and media:
|
|
7
|
+
|
|
8
|
+
| | Random Poison | Targeted Poison | Obfuscation | Encryption |
|
|
9
|
+
|--------------|----------------|-----------------|---------------|------------|
|
|
10
|
+
| Text | Nevermore | Nevermore[TBD] | Nevermore | |
|
|
11
|
+
| Image | | Nightshade | Nevermore | |
|
|
12
|
+
| Video | Nevermore[TBD] | | Nevermore[TBD]| |
|
|
13
|
+
| Audio | | HarmonyCloak | | |
|
|
14
|
+
| 3D Models | Nevermore[TBD] | | Nevermore[TBD]| |
|
|
15
|
+
| Code | Nevermore[TBD] | | | |
|
|
16
|
+
|
|
17
|
+
There are conceptually 2 other techniques to fight AI.
|
|
18
|
+
|
|
19
|
+
1) Scrape traversal traps: dynamically generated nonsense hierarchy of pages to indefinitely trap the scraper on your site. It incurs additional hosting costs, but gives the possibility of being blacklisted by the scraper. Examples of this include [Nepenthes](https://zadzmo.org/code/nepenthes/) and [Iocaine](https://iocaine.madhouse-project.org)
|
|
20
|
+
2) Scrape Poison: This technique exploits weaknesses in the scrape engines or or selector format to prevent scraping of the correct data. This is currently only theoretical, but is something we are looking at.
|
|
21
|
+
|
|
6
22
|
Text
|
|
7
23
|
---
|
|
8
24
|
while a user will see
|
|
@@ -130,8 +146,25 @@ Roadmap
|
|
|
130
146
|
- [x] image encoding
|
|
131
147
|
- [x] web component decoder
|
|
132
148
|
- [x] markup obfuscation
|
|
149
|
+
- [ ] windows compatibility
|
|
133
150
|
- [ ] self randomizing dictionary
|
|
134
151
|
- [ ] add a replacement mode (opposed to a tokenizer based solution)
|
|
152
|
+
- [ ] bindings
|
|
153
|
+
- [ ] PHP CL binding
|
|
154
|
+
- [ ] Python CL binding
|
|
155
|
+
- [ ] Java CL binding
|
|
156
|
+
- [ ] Ruby CL binding
|
|
157
|
+
- [ ] native CMS Support
|
|
158
|
+
- [ ] KeystoneJS
|
|
159
|
+
- [ ] Ghost
|
|
160
|
+
- [ ] Strapi
|
|
161
|
+
- [ ] other CMS Support
|
|
162
|
+
- [ ] wordpress
|
|
163
|
+
- [ ] Framework Support
|
|
164
|
+
- [ ] express
|
|
165
|
+
- [ ] next.js
|
|
166
|
+
- [ ] laravel
|
|
167
|
+
- [ ] rails
|
|
135
168
|
|
|
136
169
|
Development
|
|
137
170
|
-----------
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-nevermore",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"keywords": [
|
|
5
5
|
"adversarial",
|
|
6
6
|
"content",
|
|
@@ -22,6 +22,9 @@
|
|
|
22
22
|
"./image": {
|
|
23
23
|
"import": "./src/image.mjs"
|
|
24
24
|
},
|
|
25
|
+
"./html": {
|
|
26
|
+
"import": "./src/html.mjs"
|
|
27
|
+
},
|
|
25
28
|
"./encoded-image": {
|
|
26
29
|
"import": "./src/encoded-image-component.mjs"
|
|
27
30
|
},
|
package/src/html.mjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { computeIndexKeys, generateHTMLAndCSS } from '../src/index.mjs';
|
|
2
2
|
import { NevermoreImage } from '../src/image.mjs';
|
|
3
3
|
|
|
4
|
-
export const transformHTML = async (node, idx)=>{
|
|
4
|
+
export const transformHTML = async (node, idx, textureDir)=>{
|
|
5
5
|
let content = '';
|
|
6
6
|
if(node.childNodes) for(let lcv=0; lcv < node.childNodes.length; lcv++){
|
|
7
|
-
content += await transformHTML(node.childNodes[lcv]);
|
|
7
|
+
content += await transformHTML(node.childNodes[lcv], idx, textureDir);
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
try{
|
|
@@ -23,12 +23,16 @@ export const transformHTML = async (node, idx)=>{
|
|
|
23
23
|
break;
|
|
24
24
|
case 'img':
|
|
25
25
|
const srcAttr = node.attrs.find((attr)=> attr.name.toLowerCase() === 'src');
|
|
26
|
-
const image = new NevermoreImage({
|
|
26
|
+
const image = new NevermoreImage({
|
|
27
|
+
url: srcAttr.value,
|
|
28
|
+
maskDir: textureDir ||'./textures'
|
|
29
|
+
});
|
|
27
30
|
await image.ready;
|
|
28
31
|
const canvas = image.encode();
|
|
29
32
|
const url = canvas.toDataURL('jpg');
|
|
30
33
|
//return `<img src="${url}" ></img>`;
|
|
31
34
|
return `<encoded-image src="${url}" key="${image.key}"></encoded-image>`;
|
|
35
|
+
break;
|
|
32
36
|
case 'head':
|
|
33
37
|
const att = node.attrs?' '+node.attrs.map((attr)=>`${attr.name}="${attr.value}"`).join(' '):'';
|
|
34
38
|
return `<${node.tagName}${att}>${content}<script type="importmap">
|
|
@@ -47,7 +51,8 @@ export const transformHTML = async (node, idx)=>{
|
|
|
47
51
|
</script>
|
|
48
52
|
<script type="module">
|
|
49
53
|
import 'nevermore/encoded-image-component';
|
|
50
|
-
</script></${node.tagName}
|
|
54
|
+
</script></${node.tagName}>`;
|
|
55
|
+
break;
|
|
51
56
|
default:
|
|
52
57
|
const attrs = node.attrs?' '+node.attrs.map((attr)=>`${attr.name}="${attr.value}"`).join(' '):'';
|
|
53
58
|
if(node.tagName) return `<${node.tagName}${attrs}>${content}</${node.tagName}>`
|
package/src/image.mjs
CHANGED
|
@@ -79,60 +79,63 @@ export class NevermoreImage{
|
|
|
79
79
|
)) throw new Error('a set of masks are required');
|
|
80
80
|
this.textureDictionary = options.dictionary;
|
|
81
81
|
this.ready = new Promise(async (resolve, reject)=>{
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
const
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
resolve
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
82
|
+
try{
|
|
83
|
+
// load all masks
|
|
84
|
+
// TODO: load from cache
|
|
85
|
+
let masks = options.masks;
|
|
86
|
+
const textureWork = [];
|
|
87
|
+
const textureDictionary = {};
|
|
88
|
+
if(options.maskDir){
|
|
89
|
+
const types = ['jpg', 'jpeg', 'gif', 'png'];
|
|
90
|
+
const files = (await File.list(options.maskDir)).filter((name)=>{
|
|
91
|
+
const parts = name.split('.');
|
|
92
|
+
const ext = parts.pop().toLowerCase();
|
|
93
|
+
return types.indexOf(ext) !== -1;
|
|
94
|
+
// TODO: support mime through magic numbers
|
|
95
|
+
});
|
|
96
|
+
const fileLoads = [];
|
|
97
|
+
for(let lcv=0; lcv<files.length; lcv++){
|
|
98
|
+
const pth = new Path(files[lcv]);
|
|
99
|
+
if(!pth.parsed) throw new Error('path not parsed');
|
|
100
|
+
const parsed = pth.parsed.posix || pth.parsed.win32;
|
|
101
|
+
const canvasLoad = Canvas.load(
|
|
102
|
+
Path.join(options.maskDir, files[lcv])
|
|
103
|
+
);
|
|
104
|
+
textureWork.push(new Promise(async (resolve)=>{
|
|
105
|
+
resolve({
|
|
106
|
+
name: parsed.name,
|
|
107
|
+
canvas: await canvasLoad
|
|
108
|
+
});
|
|
109
|
+
}))
|
|
110
|
+
fileLoads.push(Canvas.load(Path.join(options.maskDir, files[lcv])));
|
|
111
|
+
}
|
|
112
|
+
masks = await Promise.all(fileLoads);
|
|
113
|
+
const texturesLoaded = await Promise.all(textureWork);
|
|
114
|
+
for(let lcv=0; lcv<texturesLoaded.length; lcv++){
|
|
115
|
+
textureDictionary[
|
|
116
|
+
texturesLoaded[lcv].name
|
|
117
|
+
] = texturesLoaded[lcv].canvas;
|
|
118
|
+
};
|
|
119
|
+
this.textureDictionary = textureDictionary;
|
|
110
120
|
}
|
|
111
|
-
masks =
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
'no base image provided to Image File'
|
|
129
|
-
));
|
|
121
|
+
this.masks = masks;
|
|
122
|
+
this.canvas = options.canvas;
|
|
123
|
+
// masks loaded, now load the base image
|
|
124
|
+
if(options.url){
|
|
125
|
+
this.canvas = await Canvas.load(options.url);
|
|
126
|
+
}
|
|
127
|
+
if(!this.canvas){
|
|
128
|
+
return reject(new Error(
|
|
129
|
+
'no base image provided to Image File'
|
|
130
|
+
));
|
|
131
|
+
}
|
|
132
|
+
//make an id
|
|
133
|
+
this.key = options.key || `${makeKey(5)}-${makeKey(5)}-${makeKey(5)}`;
|
|
134
|
+
//we're ready to do work
|
|
135
|
+
resolve();
|
|
136
|
+
}catch(ex){
|
|
137
|
+
reject(ex);
|
|
130
138
|
}
|
|
131
|
-
//make an id
|
|
132
|
-
this.key = options.key || `${makeKey(5)}-${makeKey(5)}-${makeKey(5)}`;
|
|
133
|
-
//console.log('KEY:', this.key);
|
|
134
|
-
//we're ready to do work
|
|
135
|
-
resolve();
|
|
136
139
|
});
|
|
137
140
|
}
|
|
138
141
|
|
package/src/index.mjs
CHANGED
|
@@ -71,26 +71,26 @@ export const computeIndexKeys = async (textBody)=>{
|
|
|
71
71
|
const parser = new ParseEnglish();
|
|
72
72
|
const node = parser.parse(textBody);
|
|
73
73
|
const index = {};
|
|
74
|
+
const skip = ['.']
|
|
74
75
|
await traverse(node, async (thisNode)=>{
|
|
75
76
|
let res = null;
|
|
76
|
-
if(thisNode.value){
|
|
77
|
+
if(thisNode.value && thisNode.type === 'TextNode'){
|
|
78
|
+
//if(skip.indexOf() !== -1) return;
|
|
77
79
|
try{
|
|
78
80
|
res = await define(thisNode.value);
|
|
79
|
-
}catch(ex){}
|
|
81
|
+
}catch(ex){ }
|
|
80
82
|
}
|
|
81
83
|
if(thisNode.type === 'TextNode' && res){
|
|
82
84
|
thisNode.word = res.word;
|
|
83
85
|
thisNode.types = res.types;
|
|
84
86
|
thisNode.definitions = res.definitions;
|
|
85
87
|
thisNode.synonyms = res.synonyms;
|
|
86
|
-
//const thesres = await thesaurus.lookup(thisNode.word);
|
|
87
88
|
if(
|
|
88
89
|
thisNode.types.indexOf('verb') !== -1 ||
|
|
89
90
|
thisNode.types.indexOf('noun') !== -1
|
|
90
91
|
){
|
|
91
92
|
thisNode.replacement = await randomWord();
|
|
92
93
|
index[thisNode.word] = thisNode.replacement;
|
|
93
|
-
//index[thisNode.word] = thisNode.synonyms[0];
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
});
|