google-img-scrap 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -6
- package/README.md +122 -122
- package/package.json +6 -2
- package/src/back/google-img-scrap-1.0.7-.js +184 -0
- package/src/constant/GOOGLE_CONSTANT.js +10 -10
- package/src/constant/extensions/IMAGES_EXTENSIONS.json +1 -1
- package/src/constant/query/GOOGLE_COLORS.js +15 -13
- package/src/constant/query/GOOGLE_PARAMS.js +15 -7
- package/src/constant/query/GOOGLE_QUERY.js +46 -37
- package/src/google-img-scrap.js +54 -20
- package/src/utils/UTILS.js +13 -17
- package/test/test-filter-titles.js +14 -14
- package/test/test-last-version.js +19 -0
- package/test/test-simple.js +11 -0
- package/test/test-url-match.js +11 -14
- package/test/test-wallpaper.js +19 -15
- package/test/test.js +22 -22
- package/types/index.d.ts +15 -3
- package/src/constant/translator/TRANSLATOR.js +0 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,27 +1,39 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
### 1.0.8
|
|
4
|
+
|
|
5
|
+
- Fixed "ERROR: Cannot assign to "queryName" because it is a constant" (by GaspardCulis)
|
|
6
|
+
- Removed gstatic url
|
|
7
|
+
- Added average color, id, title and originalUrl
|
|
8
|
+
|
|
3
9
|
### 1.0.7
|
|
10
|
+
|
|
4
11
|
- Readme update
|
|
5
12
|
|
|
6
13
|
### 1.0.6
|
|
14
|
+
|
|
7
15
|
- Fixed types
|
|
8
|
-
- Added
|
|
16
|
+
- Added `limit` to limit the size of the results
|
|
9
17
|
|
|
10
18
|
### 1.0.5
|
|
19
|
+
|
|
11
20
|
- Added types (by christophe77)
|
|
12
21
|
|
|
13
22
|
### v1.0.4
|
|
14
23
|
|
|
15
|
-
- New option
|
|
16
|
-
- New option
|
|
24
|
+
- New option `urlMatch`. You now get image when an url match a string (example: "cdn")
|
|
25
|
+
- New option `filterByTitles`. Filter images by titles
|
|
17
26
|
|
|
18
27
|
### v1.0.3
|
|
19
|
-
|
|
28
|
+
|
|
29
|
+
- New option `execute`. allow you to execute a function to remove "gstatic.com" domains for example
|
|
20
30
|
|
|
21
31
|
### v1.0.2
|
|
32
|
+
|
|
22
33
|
- Cannot set 'domains' and 'excludeDomains' as same time
|
|
23
34
|
- Fixed some bugs
|
|
24
|
-
- New option
|
|
35
|
+
- New option `excludeWords`
|
|
25
36
|
|
|
26
37
|
### v1.0.1
|
|
27
|
-
|
|
38
|
+
|
|
39
|
+
- Added the missing dependencie
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# Google-img-scrap
|
|
1
|
+
# Google-img-scrap
|
|
2
2
|
|
|
3
|
-
Scrap images from google images with customs pre filled options
|
|
3
|
+
Scrap images from google images with customs pre filled dorking options
|
|
4
4
|
|
|
5
5
|
## Update
|
|
6
6
|
|
|
@@ -19,22 +19,22 @@ npm i google-img-scrap
|
|
|
19
19
|
## Import
|
|
20
20
|
|
|
21
21
|
```js
|
|
22
|
-
const { GOOGLE_IMG_SCRAP
|
|
22
|
+
const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("google-img-scrap");
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
## Query Params
|
|
26
26
|
|
|
27
|
-
- "search"
|
|
28
|
-
- "execute" (
|
|
29
|
-
- "excludeWords"
|
|
30
|
-
- "domains"
|
|
31
|
-
- "excludeDomains"
|
|
32
|
-
- "safeSearch"
|
|
33
|
-
- "custom"
|
|
34
|
-
- "urlMatch"
|
|
35
|
-
- "filterByTitles"
|
|
36
|
-
- "query"
|
|
37
|
-
- "limit"
|
|
27
|
+
- "search" `string` what you want to search
|
|
28
|
+
- "execute" `(element: FinalResult) => FinalResult | undefined` allow you to execute a function to filter results
|
|
29
|
+
- "excludeWords" `string[]` exclude some words from the search
|
|
30
|
+
- "domains" `string[]` filter by domains
|
|
31
|
+
- "excludeDomains" `string[]` exclude some domains
|
|
32
|
+
- "safeSearch" `boolean` active safe search or not for nsfw for example
|
|
33
|
+
- "custom" `string` add extra query
|
|
34
|
+
- "urlMatch" `string[][]` get image when an url match a string (example: "cdn") | `example below`
|
|
35
|
+
- "filterByTitles" `string[][]` filter images by titles | `example below`
|
|
36
|
+
- "query" `GoogleQuery` set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | `example below`
|
|
37
|
+
- "limit" `number` to limit the size of the results
|
|
38
38
|
|
|
39
39
|
## Result
|
|
40
40
|
|
|
@@ -43,60 +43,60 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
|
43
43
|
url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats%20%20%20-%22black%22%20-%22white%22&name=content&name2=content2',
|
|
44
44
|
result: [
|
|
45
45
|
{
|
|
46
|
+
id: "HA6fW6faerBfPM",
|
|
47
|
+
title: "CAT eating a fish",
|
|
48
|
+
originalUrl: "https://media.gettyimages.com/vectors/cat-article.html",
|
|
46
49
|
url: 'https://media.gettyimages.com/vectors/cat-eating-fish-vector-id1216628506',
|
|
47
|
-
|
|
48
|
-
|
|
50
|
+
averageColor: "rgb(241, 25, 60)",
|
|
51
|
+
averageColorObject: { r: 241, g: 25, b: 60},
|
|
52
|
+
height: 1024,
|
|
53
|
+
width: 1024
|
|
49
54
|
},
|
|
50
55
|
{
|
|
56
|
+
id: "OPSfyUtrsrYUI",
|
|
57
|
+
title: "Cat",
|
|
58
|
+
originalUrl: "https://www.ariatrade.gr/images/products/2021/10/article.html",
|
|
51
59
|
url: 'https://www.ariatrade.gr/images/products/2021/10/110294_1.jpg',
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
averageColor: "rgb(201, 250, 65)",
|
|
61
|
+
averageColorObject: { r: 201, g: 250, b: 65},
|
|
62
|
+
height: 768,
|
|
63
|
+
width: 1024
|
|
54
64
|
},
|
|
55
|
-
|
|
56
|
-
url: 'https://media.gettyimages.com/illustrations/panther-leaping-illustration-id152406879?s=2048x2048',
|
|
57
|
-
height: '2048',
|
|
58
|
-
width: '2048'
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
url: 'https://media.gettyimages.com/illustrations/botany-plants-antique-engraving-illustration-erythrina-variegata-illustration-id970781520',
|
|
62
|
-
height: '1024',
|
|
63
|
-
width: '828'
|
|
64
|
-
}
|
|
65
|
+
...
|
|
65
66
|
]
|
|
66
|
-
...
|
|
67
67
|
}
|
|
68
68
|
```
|
|
69
69
|
|
|
70
70
|
## How to use ?
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
**NOTE**: For the query parameter you need to set the name in upper case !
|
|
73
73
|
|
|
74
74
|
## Simple example
|
|
75
75
|
|
|
76
76
|
Search cats images
|
|
77
77
|
|
|
78
78
|
```js
|
|
79
|
-
(async function(){
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
79
|
+
(async function () {
|
|
80
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
81
|
+
search: "cats",
|
|
82
|
+
});
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
console.log(test);
|
|
85
85
|
})();
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
##
|
|
88
|
+
## Filtering
|
|
89
89
|
|
|
90
90
|
```js
|
|
91
|
-
(async function(){
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
91
|
+
(async function () {
|
|
92
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
93
|
+
search: "demon slayer background hd",
|
|
94
|
+
execute: function (element) {
|
|
95
|
+
if (element.url.length < 20) return element;
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
console.log(test);
|
|
100
100
|
})();
|
|
101
101
|
```
|
|
102
102
|
|
|
@@ -105,33 +105,33 @@ Search cats images
|
|
|
105
105
|
All query options are optional (see below for all the options)
|
|
106
106
|
|
|
107
107
|
```js
|
|
108
|
-
(async function(){
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
108
|
+
(async function () {
|
|
109
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
110
|
+
search: "cats",
|
|
111
|
+
query: {
|
|
112
|
+
TYPE: GOOGLE_QUERY.TYPE.CLIPART,
|
|
113
|
+
DATE: GOOGLE_QUERY.DATE.YEAR,
|
|
114
|
+
COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
|
|
115
|
+
SIZE: GOOGLE_QUERY.SIZE.LARGE,
|
|
116
|
+
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
117
|
+
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG,
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
console.log(test);
|
|
122
122
|
})();
|
|
123
123
|
```
|
|
124
124
|
|
|
125
125
|
## Limit result size
|
|
126
126
|
|
|
127
127
|
```js
|
|
128
|
-
(async function(){
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
128
|
+
(async function () {
|
|
129
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
130
|
+
search: "cats",
|
|
131
|
+
limit: 5,
|
|
132
|
+
});
|
|
133
133
|
|
|
134
|
-
|
|
134
|
+
console.log(test);
|
|
135
135
|
})();
|
|
136
136
|
```
|
|
137
137
|
|
|
@@ -140,26 +140,26 @@ All query options are optional (see below for all the options)
|
|
|
140
140
|
Only scrap from a specific domain
|
|
141
141
|
|
|
142
142
|
```js
|
|
143
|
-
(async function(){
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
143
|
+
(async function () {
|
|
144
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
145
|
+
search: "cats",
|
|
146
|
+
domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
|
|
147
|
+
});
|
|
148
148
|
|
|
149
|
-
|
|
149
|
+
console.log(test);
|
|
150
150
|
})();
|
|
151
151
|
```
|
|
152
152
|
|
|
153
153
|
## Exclude domains
|
|
154
154
|
|
|
155
155
|
```js
|
|
156
|
-
(async function(){
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
156
|
+
(async function () {
|
|
157
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
158
|
+
search: "cats",
|
|
159
|
+
excludeDomains: ["istockphoto.com", "alamy.com"],
|
|
160
|
+
});
|
|
161
161
|
|
|
162
|
-
|
|
162
|
+
console.log(test);
|
|
163
163
|
})();
|
|
164
164
|
```
|
|
165
165
|
|
|
@@ -168,39 +168,39 @@ Only scrap from a specific domain
|
|
|
168
168
|
If you don' like black cats and white cats
|
|
169
169
|
|
|
170
170
|
```js
|
|
171
|
-
(async function(){
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
171
|
+
(async function () {
|
|
172
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
173
|
+
search: "cats",
|
|
174
|
+
excludeWords: ["black", "white"], //If you don't like black cats and white cats
|
|
175
|
+
});
|
|
176
176
|
|
|
177
|
-
|
|
177
|
+
console.log(test);
|
|
178
178
|
})();
|
|
179
179
|
```
|
|
180
180
|
|
|
181
181
|
## Safe search (no nsfw)
|
|
182
182
|
|
|
183
183
|
```js
|
|
184
|
-
(async function(){
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
184
|
+
(async function () {
|
|
185
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
186
|
+
search: "cats",
|
|
187
|
+
safeSearch: false,
|
|
188
|
+
});
|
|
189
189
|
|
|
190
|
-
|
|
190
|
+
console.log(test);
|
|
191
191
|
})();
|
|
192
192
|
```
|
|
193
193
|
|
|
194
194
|
## Custom query params
|
|
195
195
|
|
|
196
196
|
```js
|
|
197
|
-
(async function(){
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
197
|
+
(async function () {
|
|
198
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
199
|
+
search: "cats",
|
|
200
|
+
custom: "name=content&name2=content2",
|
|
201
|
+
});
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
console.log(test);
|
|
204
204
|
})();
|
|
205
205
|
```
|
|
206
206
|
|
|
@@ -209,17 +209,17 @@ If you don' like black cats and white cats
|
|
|
209
209
|
- urlMatch work like filterByTiles
|
|
210
210
|
|
|
211
211
|
```js
|
|
212
|
-
(async function(){
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
212
|
+
(async function () {
|
|
213
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
214
|
+
search: "cats",
|
|
215
|
+
//will build something like this "(draw and white) or (albino and white)"
|
|
216
|
+
filterByTitles: [
|
|
217
|
+
["draw", "white"],
|
|
218
|
+
["albino", "white"],
|
|
219
|
+
],
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
console.log(test);
|
|
223
223
|
})();
|
|
224
224
|
```
|
|
225
225
|
|
|
@@ -227,10 +227,10 @@ If you don' like black cats and white cats
|
|
|
227
227
|
|
|
228
228
|
```js
|
|
229
229
|
{
|
|
230
|
-
SIZE: {
|
|
231
|
-
LARGE,
|
|
232
|
-
MEDIUM,
|
|
233
|
-
ICON
|
|
230
|
+
SIZE: {
|
|
231
|
+
LARGE,
|
|
232
|
+
MEDIUM,
|
|
233
|
+
ICON
|
|
234
234
|
},
|
|
235
235
|
COLOR: {
|
|
236
236
|
BLACK_AND_WHITE,
|
|
@@ -248,10 +248,10 @@ If you don' like black cats and white cats
|
|
|
248
248
|
BLACK,
|
|
249
249
|
BROWN
|
|
250
250
|
},
|
|
251
|
-
TYPE: {
|
|
252
|
-
CLIPART,
|
|
253
|
-
DRAW,
|
|
254
|
-
GIF
|
|
251
|
+
TYPE: {
|
|
252
|
+
CLIPART,
|
|
253
|
+
DRAW,
|
|
254
|
+
GIF
|
|
255
255
|
},
|
|
256
256
|
EXTENSION: {
|
|
257
257
|
JPG,
|
|
@@ -263,15 +263,15 @@ If you don' like black cats and white cats
|
|
|
263
263
|
ICO,
|
|
264
264
|
RAW
|
|
265
265
|
},
|
|
266
|
-
DATE: {
|
|
267
|
-
DAY,
|
|
268
|
-
WEEK,
|
|
269
|
-
MONTH,
|
|
270
|
-
YEAR
|
|
266
|
+
DATE: {
|
|
267
|
+
DAY,
|
|
268
|
+
WEEK,
|
|
269
|
+
MONTH,
|
|
270
|
+
YEAR
|
|
271
271
|
},
|
|
272
|
-
LICENCE: {
|
|
273
|
-
CREATIVE_COMMONS,
|
|
274
|
-
COMMERCIAL_AND_OTHER
|
|
272
|
+
LICENCE: {
|
|
273
|
+
CREATIVE_COMMONS,
|
|
274
|
+
COMMERCIAL_AND_OTHER
|
|
275
275
|
}
|
|
276
276
|
}
|
|
277
277
|
```
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "google-img-scrap",
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "Scrap images from google images with customs pre filled options",
|
|
3
|
+
"version": "1.0.8",
|
|
4
|
+
"description": "Scrap images from google images with customs pre filled dorking options",
|
|
5
5
|
"main": "./src/google-img-scrap.js",
|
|
6
6
|
"types": "./types/index.d.ts",
|
|
7
7
|
"directories": {
|
|
@@ -34,6 +34,10 @@
|
|
|
34
34
|
{
|
|
35
35
|
"name": "christophe77",
|
|
36
36
|
"url": "https://github.com/christophe77"
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"name": "GaspardCulis",
|
|
40
|
+
"url": "https://github.com/GaspardCulis"
|
|
37
41
|
}
|
|
38
42
|
],
|
|
39
43
|
"license": "MIT",
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
const got = require("got");
|
|
2
|
+
const { FastHTMLParser } = require("fast-html-dom-parser");
|
|
3
|
+
|
|
4
|
+
const { GOOGLE_CONSTANT } = require("../constant/GOOGLE_CONSTANT");
|
|
5
|
+
const { GOOGLE_QUERY } = require("../constant/query/GOOGLE_QUERY");
|
|
6
|
+
const EXTENSIONS = require("../constant/extensions/IMAGES_EXTENSIONS.json");
|
|
7
|
+
|
|
8
|
+
const { buildQuery, unicodeToChar } = require("../utils/UTILS");
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Validation of the arguments passed
|
|
12
|
+
* @param {import("../../types").Config} config
|
|
13
|
+
*/
|
|
14
|
+
function verify(config) {
|
|
15
|
+
if (config.excludeDomains && config.domains)
|
|
16
|
+
throw "Can not set 'excludeDomains' and 'domains' as same times";
|
|
17
|
+
|
|
18
|
+
if (!config.search || config.search.trim() == "")
|
|
19
|
+
throw "'search' can not be empty";
|
|
20
|
+
|
|
21
|
+
if (config.query) {
|
|
22
|
+
const queryToVerify = Object.keys(GOOGLE_QUERY);
|
|
23
|
+
|
|
24
|
+
for (const key of Object.keys(config.query)) {
|
|
25
|
+
if (!queryToVerify.includes(key)) throw `Invalide query name '${key}'`;
|
|
26
|
+
|
|
27
|
+
const VALUES = Object.values(GOOGLE_QUERY[key]);
|
|
28
|
+
const ACTUAL_VALUE = config.query[key];
|
|
29
|
+
if (!VALUES.includes(ACTUAL_VALUE))
|
|
30
|
+
throw `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Verifify the url is an image
|
|
37
|
+
* @param {string} content
|
|
38
|
+
* @returns {boolean}
|
|
39
|
+
*/
|
|
40
|
+
function containImage(content = "") {
|
|
41
|
+
return EXTENSIONS.some((extension) => content.includes(extension));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
*Parse the html from google image to get the images links
|
|
46
|
+
* @param {string} url
|
|
47
|
+
* @returns {import("../../types").FinalResult[]}
|
|
48
|
+
*/
|
|
49
|
+
async function parse(url) {
|
|
50
|
+
const result = [];
|
|
51
|
+
|
|
52
|
+
const response = await got(url, {
|
|
53
|
+
headers: GOOGLE_CONSTANT.headers,
|
|
54
|
+
});
|
|
55
|
+
const parser = new FastHTMLParser(response.body);
|
|
56
|
+
|
|
57
|
+
const scripts = parser.getElementsByTagName("script");
|
|
58
|
+
|
|
59
|
+
if (!scripts) return result;
|
|
60
|
+
|
|
61
|
+
for (const script of scripts) {
|
|
62
|
+
const body = script.innerHTML;
|
|
63
|
+
|
|
64
|
+
const valide = containImage(body);
|
|
65
|
+
|
|
66
|
+
if (valide) {
|
|
67
|
+
const regex = /\["(http.+?)",(\d+),(\d+)\]/gi;
|
|
68
|
+
|
|
69
|
+
let res = null;
|
|
70
|
+
|
|
71
|
+
while ((res = regex.exec(body)) != null) {
|
|
72
|
+
if (res.length >= 4 && res[1].match(/http/gi).length < 2)
|
|
73
|
+
result.push({
|
|
74
|
+
url: unicodeToChar(res[1]),
|
|
75
|
+
height: res[2],
|
|
76
|
+
width: res[3],
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return result;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Main function to build google image dork URL
|
|
87
|
+
* @param {import("../../types").Config} config
|
|
88
|
+
* @returns {import("../../types").Results}
|
|
89
|
+
*/
|
|
90
|
+
async function GOOGLE_IMG_SCRAP(config = {}) {
|
|
91
|
+
verify(config);
|
|
92
|
+
|
|
93
|
+
//exclude domains
|
|
94
|
+
const EXCLUDE_DOMAINS = [];
|
|
95
|
+
if (config.excludeDomains)
|
|
96
|
+
config.excludeDomains.forEach((domain) =>
|
|
97
|
+
EXCLUDE_DOMAINS.push(`-site:"${domain}"`)
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
//domains
|
|
101
|
+
const DOMAINS = [];
|
|
102
|
+
if (config.domains)
|
|
103
|
+
config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`));
|
|
104
|
+
|
|
105
|
+
//exclude words
|
|
106
|
+
const EXCLUDE_WORDS = [];
|
|
107
|
+
if (config.excludeWords)
|
|
108
|
+
config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`));
|
|
109
|
+
|
|
110
|
+
//filter by titles
|
|
111
|
+
const FILTER_TITLE = [];
|
|
112
|
+
if (config.filterByTitles)
|
|
113
|
+
config.filterByTitles.forEach((titleFilter) => {
|
|
114
|
+
const value = titleFilter.map((title) => {
|
|
115
|
+
return `intitle:"${title}"`;
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
FILTER_TITLE.push(`(${value.join(" AND ")})`);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
//url match words
|
|
122
|
+
const URL_MATCH = [];
|
|
123
|
+
if (config.urlMatch)
|
|
124
|
+
config.urlMatch.forEach((urlMatch) => {
|
|
125
|
+
const value = urlMatch.map((content) => {
|
|
126
|
+
return `inurl:${content}`;
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
URL_MATCH.push(`(${value.join(" AND ")})`);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
//building url
|
|
133
|
+
const SEARCH_TERM =
|
|
134
|
+
config.search +
|
|
135
|
+
" " +
|
|
136
|
+
URL_MATCH.join(" OR ") +
|
|
137
|
+
" " +
|
|
138
|
+
FILTER_TITLE.join(" OR ") +
|
|
139
|
+
" " +
|
|
140
|
+
EXCLUDE_WORDS.join(" ") +
|
|
141
|
+
" " +
|
|
142
|
+
EXCLUDE_DOMAINS.join(" ") +
|
|
143
|
+
" " +
|
|
144
|
+
DOMAINS.join(" OR ");
|
|
145
|
+
|
|
146
|
+
const SEARCH = encodeURIComponent(SEARCH_TERM.trim());
|
|
147
|
+
const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
|
|
148
|
+
[GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","),
|
|
149
|
+
q: SEARCH,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const CUSTOM_PARAM = config.custom ? `&${config.custom}` : "";
|
|
153
|
+
const SAFE_SEARCH = config.safeSearch ? `&safe=active` : "";
|
|
154
|
+
|
|
155
|
+
const URL =
|
|
156
|
+
GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH;
|
|
157
|
+
|
|
158
|
+
//parsing
|
|
159
|
+
const result = await parse(URL);
|
|
160
|
+
|
|
161
|
+
//excute function
|
|
162
|
+
let finalResult = [];
|
|
163
|
+
if (config.execute)
|
|
164
|
+
result.forEach((element) => {
|
|
165
|
+
const value = config.execute(element);
|
|
166
|
+
if (value) finalResult.push(value);
|
|
167
|
+
});
|
|
168
|
+
else finalResult = result;
|
|
169
|
+
|
|
170
|
+
//limit result
|
|
171
|
+
let slicedResult = [];
|
|
172
|
+
const { limit } = config;
|
|
173
|
+
|
|
174
|
+
if (limit && limit > 0 && finalResult.length > limit) {
|
|
175
|
+
slicedResult = finalResult.slice(0, limit);
|
|
176
|
+
}
|
|
177
|
+
//result
|
|
178
|
+
return {
|
|
179
|
+
url: URL,
|
|
180
|
+
result: slicedResult.length > 0 ? slicedResult : finalResult,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
module.exports = { GOOGLE_IMG_SCRAP, GOOGLE_QUERY };
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
const GOOGLE_CONSTANT = {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
2
|
+
url: "https://images.google.com/search",
|
|
3
|
+
queryParam: "tbs",
|
|
4
|
+
forceGoogleImage: {
|
|
5
|
+
tbm: "isch", //needed to search on google image instead of google
|
|
6
|
+
},
|
|
7
|
+
headers: {
|
|
8
|
+
"User-Agent":
|
|
9
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
|
|
10
|
+
},
|
|
11
11
|
};
|
|
12
12
|
|
|
13
|
-
module.exports = { GOOGLE_CONSTANT };
|
|
13
|
+
module.exports = { GOOGLE_CONSTANT };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
["jpg", "gif", "bmp", "png", "svg", "webp", "ico", "raw"]
|
|
1
|
+
["jpg", "gif", "bmp", "png", "svg", "webp", "ico", "raw"]
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
const COLORS = [
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
const COLORS = [
|
|
2
|
+
"red",
|
|
3
|
+
"blue",
|
|
4
|
+
"purple",
|
|
5
|
+
"orange",
|
|
6
|
+
"yellow",
|
|
7
|
+
"green",
|
|
8
|
+
"teal",
|
|
9
|
+
"pink",
|
|
10
|
+
"white",
|
|
11
|
+
"gray",
|
|
12
|
+
"black",
|
|
13
|
+
"brown",
|
|
14
|
+
];
|
|
13
15
|
|
|
14
|
-
module.exports = { COLORS };
|
|
16
|
+
module.exports = { COLORS };
|
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
const SIZE_PARAM = "isz",
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
2
|
+
COLOR_PARAM = "ic",
|
|
3
|
+
SPECIFIC_COLOR_PARAM = "Cisc",
|
|
4
|
+
TYPE_PARAM = "itp",
|
|
5
|
+
DATE_PARAM = "qdr",
|
|
6
|
+
LICENCE_PARAM = "il",
|
|
7
|
+
IMAGE_EXTENSION_PARAM = "ift";
|
|
8
8
|
|
|
9
|
-
module.exports = {
|
|
9
|
+
module.exports = {
|
|
10
|
+
SIZE_PARAM,
|
|
11
|
+
COLOR_PARAM,
|
|
12
|
+
SPECIFIC_COLOR_PARAM,
|
|
13
|
+
TYPE_PARAM,
|
|
14
|
+
DATE_PARAM,
|
|
15
|
+
LICENCE_PARAM,
|
|
16
|
+
IMAGE_EXTENSION_PARAM,
|
|
17
|
+
};
|
|
@@ -1,50 +1,59 @@
|
|
|
1
|
-
const {
|
|
2
|
-
|
|
1
|
+
const {
|
|
2
|
+
SIZE_PARAM,
|
|
3
|
+
COLOR_PARAM,
|
|
4
|
+
SPECIFIC_COLOR_PARAM,
|
|
5
|
+
TYPE_PARAM,
|
|
6
|
+
DATE_PARAM,
|
|
7
|
+
LICENCE_PARAM,
|
|
8
|
+
IMAGE_EXTENSION_PARAM,
|
|
9
|
+
} = require("./GOOGLE_PARAMS");
|
|
10
|
+
const { COLORS } = require("./GOOGLE_COLORS");
|
|
3
11
|
const EXTENSIONS = require("../extensions/IMAGES_EXTENSIONS.json");
|
|
4
12
|
|
|
5
13
|
const GOOGLE_QUERY = {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
14
|
+
SIZE: {
|
|
15
|
+
LARGE: SIZE_PARAM + ":l",
|
|
16
|
+
MEDIUM: SIZE_PARAM + ":m",
|
|
17
|
+
ICON: SIZE_PARAM + ":i",
|
|
18
|
+
},
|
|
19
|
+
|
|
20
|
+
COLOR: {
|
|
21
|
+
BLACK_AND_WHITE: COLOR_PARAM + ":gray",
|
|
22
|
+
TRANSPARENT: COLOR_PARAM + ":trans",
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
TYPE: {
|
|
26
|
+
CLIPART: TYPE_PARAM + ":clipart",
|
|
27
|
+
DRAW: TYPE_PARAM + ":lineart",
|
|
28
|
+
GIF: TYPE_PARAM + ":animated",
|
|
29
|
+
},
|
|
30
|
+
|
|
31
|
+
EXTENSION: {},
|
|
32
|
+
|
|
33
|
+
DATE: {
|
|
34
|
+
DAY: DATE_PARAM + ":d",
|
|
35
|
+
WEEK: DATE_PARAM + ":w",
|
|
36
|
+
MONTH: DATE_PARAM + ":m",
|
|
37
|
+
YEAR: DATE_PARAM + ":y",
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
LICENCE: {
|
|
41
|
+
CREATIVE_COMMONS: LICENCE_PARAM + ":cl",
|
|
42
|
+
COMMERCIAL_AND_OTHER: LICENCE_PARAM + ":ol",
|
|
43
|
+
},
|
|
36
44
|
};
|
|
37
45
|
|
|
38
46
|
//build extension
|
|
39
47
|
EXTENSIONS.forEach((EXTENSION) => {
|
|
40
|
-
|
|
41
|
-
|
|
48
|
+
const queryName = EXTENSION.toUpperCase();
|
|
49
|
+
GOOGLE_QUERY.EXTENSION[queryName] = IMAGE_EXTENSION_PARAM + ":" + EXTENSION;
|
|
42
50
|
});
|
|
43
51
|
|
|
44
52
|
//build colors
|
|
45
53
|
COLORS.forEach((COLOR) => {
|
|
46
|
-
|
|
47
|
-
|
|
54
|
+
const queryName = COLOR.toUpperCase();
|
|
55
|
+
GOOGLE_QUERY.COLOR[queryName] =
|
|
56
|
+
COLOR_PARAM + ":specific," + SPECIFIC_COLOR_PARAM + ":" + COLOR;
|
|
48
57
|
});
|
|
49
58
|
|
|
50
|
-
module.exports = { GOOGLE_QUERY };
|
|
59
|
+
module.exports = { GOOGLE_QUERY };
|
package/src/google-img-scrap.js
CHANGED
|
@@ -3,12 +3,14 @@ const { FastHTMLParser } = require("fast-html-dom-parser");
|
|
|
3
3
|
|
|
4
4
|
const { GOOGLE_CONSTANT } = require("./constant/GOOGLE_CONSTANT");
|
|
5
5
|
const { GOOGLE_QUERY } = require("./constant/query/GOOGLE_QUERY");
|
|
6
|
-
const { TRANSLATOR } = require("./constant/translator/TRANSLATOR");
|
|
7
6
|
const EXTENSIONS = require("./constant/extensions/IMAGES_EXTENSIONS.json");
|
|
8
7
|
|
|
9
8
|
const { buildQuery, unicodeToChar } = require("./utils/UTILS");
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Validation of the arguments passed
|
|
12
|
+
* @param {import("../types").Config} config
|
|
13
|
+
*/
|
|
12
14
|
function verify(config) {
|
|
13
15
|
if (config.excludeDomains && config.domains)
|
|
14
16
|
throw "Can not set 'excludeDomains' and 'domains' as same times";
|
|
@@ -30,16 +32,20 @@ function verify(config) {
|
|
|
30
32
|
}
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
/**
|
|
36
|
+
* Verifify the url is an image
|
|
37
|
+
* @param {string} content
|
|
38
|
+
* @returns {boolean}
|
|
39
|
+
*/
|
|
34
40
|
function containImage(content = "") {
|
|
35
|
-
|
|
36
|
-
if (content.includes(EXTENSION)) return true;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return false;
|
|
41
|
+
return EXTENSIONS.some((extension) => content.includes(extension));
|
|
40
42
|
}
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
/**
|
|
45
|
+
*Parse the html from google image to get the images links
|
|
46
|
+
* @param {string} url
|
|
47
|
+
* @returns {import("../types").FinalResult[]}
|
|
48
|
+
*/
|
|
43
49
|
async function parse(url) {
|
|
44
50
|
const result = [];
|
|
45
51
|
|
|
@@ -58,17 +64,45 @@ async function parse(url) {
|
|
|
58
64
|
const valide = containImage(body);
|
|
59
65
|
|
|
60
66
|
if (valide) {
|
|
61
|
-
|
|
67
|
+
//getting image url, height, width, average
|
|
68
|
+
const regex =
|
|
69
|
+
/\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi;
|
|
70
|
+
|
|
71
|
+
//getting originalUrl, title, id
|
|
72
|
+
const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi;
|
|
62
73
|
|
|
63
74
|
let res = null;
|
|
75
|
+
let secondRes = null;
|
|
76
|
+
|
|
77
|
+
while (
|
|
78
|
+
(res = regex.exec(body)) != null &&
|
|
79
|
+
(secondRes = secondRegex.exec(body)) != null
|
|
80
|
+
) {
|
|
81
|
+
if (
|
|
82
|
+
res.length >= 4 &&
|
|
83
|
+
res[1].match(/http/gi).length < 2 &&
|
|
84
|
+
secondRes.length === 4 &&
|
|
85
|
+
secondRes[2].match(/http/gi).length < 2
|
|
86
|
+
) {
|
|
87
|
+
const [r, g, b] = [res[4], res[5], res[6]].map((e) =>
|
|
88
|
+
parseInt(e, 10)
|
|
89
|
+
);
|
|
64
90
|
|
|
65
|
-
while ((res = regex.exec(body)) != null) {
|
|
66
|
-
if (res.length >= 4 && res[1].match(/http/gi).length < 2)
|
|
67
91
|
result.push({
|
|
92
|
+
id: secondRes[1],
|
|
93
|
+
title: secondRes[3],
|
|
68
94
|
url: unicodeToChar(res[1]),
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
originalUrl: unicodeToChar(secondRes[2]),
|
|
96
|
+
averageColor: `rgb(${r}, ${g}, ${b})`,
|
|
97
|
+
averageColorObject: {
|
|
98
|
+
r,
|
|
99
|
+
g,
|
|
100
|
+
b,
|
|
101
|
+
},
|
|
102
|
+
height: parseInt(res[2], 10),
|
|
103
|
+
width: parseInt(res[3], 10),
|
|
71
104
|
});
|
|
105
|
+
}
|
|
72
106
|
}
|
|
73
107
|
}
|
|
74
108
|
}
|
|
@@ -76,9 +110,12 @@ async function parse(url) {
|
|
|
76
110
|
return result;
|
|
77
111
|
}
|
|
78
112
|
|
|
79
|
-
|
|
113
|
+
/**
|
|
114
|
+
* Main function to build google image dork URL
|
|
115
|
+
* @param {import("../types").Config} config
|
|
116
|
+
* @returns {import("../types").Results}
|
|
117
|
+
*/
|
|
80
118
|
async function GOOGLE_IMG_SCRAP(config = {}) {
|
|
81
|
-
//verify config
|
|
82
119
|
verify(config);
|
|
83
120
|
|
|
84
121
|
//exclude domains
|
|
@@ -144,10 +181,7 @@ async function GOOGLE_IMG_SCRAP(config = {}) {
|
|
|
144
181
|
const SAFE_SEARCH = config.safeSearch ? `&safe=active` : "";
|
|
145
182
|
|
|
146
183
|
const URL =
|
|
147
|
-
GOOGLE_CONSTANT.url +
|
|
148
|
-
buildQuery(QUERY, TRANSLATOR) +
|
|
149
|
-
CUSTOM_PARAM +
|
|
150
|
-
SAFE_SEARCH;
|
|
184
|
+
GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH;
|
|
151
185
|
|
|
152
186
|
//parsing
|
|
153
187
|
const result = await parse(URL);
|
package/src/utils/UTILS.js
CHANGED
|
@@ -1,24 +1,20 @@
|
|
|
1
|
-
function buildQuery(query
|
|
2
|
-
|
|
1
|
+
function buildQuery(query) {
|
|
2
|
+
const result = [];
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
const toTranslate = Object.keys(translator);
|
|
4
|
+
const params = Object.keys(query);
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
for (const param of params) {
|
|
7
|
+
const queryName = param;
|
|
8
|
+
result.push(`${queryName}=${query[param]}`);
|
|
9
|
+
}
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
return "?" + result.join('&');
|
|
11
|
+
return "?" + result.join("&");
|
|
15
12
|
}
|
|
16
13
|
|
|
17
14
|
function unicodeToChar(text) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
15
|
+
return text.replace(/\\u[\dA-F]{4}/gi, function (match) {
|
|
16
|
+
return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16));
|
|
17
|
+
});
|
|
18
|
+
}
|
|
23
19
|
|
|
24
|
-
module.exports = { buildQuery, unicodeToChar };
|
|
20
|
+
module.exports = { buildQuery, unicodeToChar };
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
const { GOOGLE_IMG_SCRAP } = require(
|
|
1
|
+
const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
|
|
2
2
|
|
|
3
|
-
(async function(){
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
3
|
+
(async function () {
|
|
4
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
5
|
+
search: "cats",
|
|
6
|
+
filterByTitles: [
|
|
7
|
+
["draw", "white"],
|
|
8
|
+
["albino", "white"],
|
|
9
|
+
],
|
|
10
|
+
execute: function (element) {
|
|
11
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
12
|
+
},
|
|
13
|
+
});
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
})();
|
|
15
|
+
console.log(test, test.result.length);
|
|
16
|
+
})();
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
|
|
2
|
+
const V107 = require("../src/back/google-img-scrap-1.0.7-.js");
|
|
3
|
+
|
|
4
|
+
// console.log(GOOGLE_QUERY);
|
|
5
|
+
|
|
6
|
+
(async function () {
|
|
7
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
8
|
+
search: "cats",
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
const test2 = await V107.GOOGLE_IMG_SCRAP({
|
|
12
|
+
search: "cats",
|
|
13
|
+
execute: function (element) {
|
|
14
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
15
|
+
},
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
console.log(test.result.length, test2.result.length);
|
|
19
|
+
})();
|
package/test/test-url-match.js
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
const { GOOGLE_IMG_SCRAP } = require(
|
|
1
|
+
const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
|
|
2
2
|
|
|
3
|
-
(async function(){
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
if(!element.url.match('gstatic.com')) return element;
|
|
12
|
-
}
|
|
13
|
-
});
|
|
3
|
+
(async function () {
|
|
4
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
5
|
+
search: "cats",
|
|
6
|
+
urlMatch: [["cdn"], ["istockphoto"]],
|
|
7
|
+
execute: function (element) {
|
|
8
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
9
|
+
},
|
|
10
|
+
});
|
|
14
11
|
|
|
15
|
-
|
|
16
|
-
})();
|
|
12
|
+
console.log(test, test.result.length);
|
|
13
|
+
})();
|
package/test/test-wallpaper.js
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
|
-
const { GOOGLE_IMG_SCRAP
|
|
1
|
+
const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap");
|
|
2
2
|
|
|
3
|
-
(async function(){
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
3
|
+
(async function () {
|
|
4
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
5
|
+
search: "demon slayer background hd",
|
|
6
|
+
query: {
|
|
7
|
+
SIZE: GOOGLE_QUERY.SIZE.LARGE,
|
|
8
|
+
},
|
|
9
|
+
domains: ["alphacoders.com"],
|
|
10
|
+
safeSearch: false,
|
|
11
|
+
execute: function (element) {
|
|
12
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
13
|
+
},
|
|
14
|
+
});
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
console.log(
|
|
17
|
+
test,
|
|
18
|
+
test.result[test.result.length - 1].url,
|
|
19
|
+
test.result.length
|
|
20
|
+
);
|
|
21
|
+
})();
|
package/test/test.js
CHANGED
|
@@ -1,26 +1,26 @@
|
|
|
1
|
-
const { GOOGLE_IMG_SCRAP
|
|
1
|
+
const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap");
|
|
2
2
|
|
|
3
3
|
// console.log(GOOGLE_QUERY);
|
|
4
4
|
|
|
5
|
-
(async function(){
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
5
|
+
(async function () {
|
|
6
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
7
|
+
search: "cats",
|
|
8
|
+
query: {
|
|
9
|
+
TYPE: GOOGLE_QUERY.TYPE.CLIPART,
|
|
10
|
+
DATE: GOOGLE_QUERY.DATE.YEAR,
|
|
11
|
+
COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
|
|
12
|
+
SIZE: GOOGLE_QUERY.SIZE.LARGE,
|
|
13
|
+
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
14
|
+
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG,
|
|
15
|
+
},
|
|
16
|
+
excludeWords: ["black", "white"], //If you don't like black and white cats
|
|
17
|
+
custom: "name=content&name2=content2",
|
|
18
|
+
safeSearch: false,
|
|
19
|
+
execute: function (element) {
|
|
20
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
21
|
+
},
|
|
22
|
+
// excludeDomains: ["istockphoto.com", "alamy.com"]
|
|
23
|
+
});
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
})();
|
|
25
|
+
console.log(test, test.result.length);
|
|
26
|
+
})();
|
package/types/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
type Config = {
|
|
2
2
|
search: string;
|
|
3
|
-
limit?:number;
|
|
3
|
+
limit?: number;
|
|
4
4
|
query?: {
|
|
5
5
|
TYPE?: string;
|
|
6
6
|
DATE?: string;
|
|
@@ -17,15 +17,27 @@ type Config = {
|
|
|
17
17
|
execute?: (element: FinalResult) => FinalResult | undefined;
|
|
18
18
|
filterByTitles?: [string[]];
|
|
19
19
|
};
|
|
20
|
+
|
|
20
21
|
type FinalResult = {
|
|
22
|
+
id: string;
|
|
23
|
+
title: string;
|
|
24
|
+
originalUrl: string;
|
|
21
25
|
url: string;
|
|
22
|
-
|
|
23
|
-
|
|
26
|
+
averageColor: string;
|
|
27
|
+
averageColorObject: {
|
|
28
|
+
r: number;
|
|
29
|
+
g: number;
|
|
30
|
+
b: number;
|
|
31
|
+
};
|
|
32
|
+
height: number;
|
|
33
|
+
width: number;
|
|
24
34
|
};
|
|
35
|
+
|
|
25
36
|
type Results = {
|
|
26
37
|
url: string;
|
|
27
38
|
result: FinalResult[];
|
|
28
39
|
};
|
|
40
|
+
|
|
29
41
|
type GoogleQuery = {
|
|
30
42
|
SIZE: {
|
|
31
43
|
LARGE: string;
|