google-img-scrap 1.0.4 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -1
- package/README.md +116 -54
- package/package.json +48 -41
- package/src/google-img-scrap.js +144 -116
- package/src/utils/UTILS.js +4 -4
- package/test/test-result-limit.js +25 -0
- package/test/test.js +0 -1
- package/types/index.d.ts +96 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,8 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
### 1.0.7
|
|
4
|
+
- Readme update
|
|
5
|
+
|
|
6
|
+
### 1.0.6
|
|
7
|
+
- Fixed types
|
|
8
|
+
- Added ```limit``` to limit the size of the results
|
|
9
|
+
|
|
10
|
+
### 1.0.5
|
|
11
|
+
- Added types (by christophe77)
|
|
12
|
+
|
|
3
13
|
### v1.0.4
|
|
4
14
|
|
|
5
|
-
- New option ```urlMatch```. You
|
|
15
|
+
- New option ```urlMatch```. You now get image when an url match a string (example: "cdn")
|
|
6
16
|
- New option ```filterByTitles```. Filter images by titles
|
|
7
17
|
|
|
8
18
|
### v1.0.3
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# Google-img-scrap v1.0.
|
|
1
|
+
# Google-img-scrap v1.0.7
|
|
2
2
|
|
|
3
|
-
Scrap images from google
|
|
3
|
+
Scrap images from google images with customs pre filled options
|
|
4
4
|
|
|
5
5
|
## Update
|
|
6
6
|
|
|
@@ -18,67 +18,93 @@ npm i google-img-scrap
|
|
|
18
18
|
|
|
19
19
|
## Import
|
|
20
20
|
|
|
21
|
-
- NPM
|
|
22
|
-
|
|
23
21
|
```js
|
|
24
22
|
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
25
23
|
```
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```js
|
|
30
|
-
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('./src/google-img-scrap');
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Params
|
|
25
|
+
## Query Params
|
|
34
26
|
|
|
35
27
|
- "search" (String) what you want to search
|
|
36
28
|
- "execute" (Function) allow you to execute a function to remove "gstatic.com" domains for example
|
|
37
|
-
- "excludeWords" (
|
|
38
|
-
- "domains" (
|
|
39
|
-
- "excludeDomains" (
|
|
29
|
+
- "excludeWords" (String[]) exclude some words from the search
|
|
30
|
+
- "domains" (String[]) filter by domains
|
|
31
|
+
- "excludeDomains" (String[]) exclude some domains
|
|
40
32
|
- "safeSearch" (Boolean) active safe search or not for nsfw for example
|
|
41
33
|
- "custom" (String) add extra query
|
|
42
|
-
- "urlMatch" (
|
|
43
|
-
- "filterByTitles" (
|
|
34
|
+
- "urlMatch" (String[][]) get image when an url match a string (example: "cdn") | ```example below```
|
|
35
|
+
- "filterByTitles" (String[][]) filter images by titles | ```example below```
|
|
44
36
|
- "query" (Object) set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | ```example below```
|
|
37
|
+
- "limit" (Int) to limit the size of the results
|
|
45
38
|
|
|
46
39
|
## Result
|
|
47
40
|
|
|
48
41
|
```js
|
|
49
|
-
}
|
|
50
42
|
{
|
|
51
|
-
url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats',
|
|
43
|
+
url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats%20%20%20-%22black%22%20-%22white%22&name=content&name2=content2',
|
|
52
44
|
result: [
|
|
53
45
|
{
|
|
54
|
-
url: 'https://media.
|
|
55
|
-
height: '
|
|
46
|
+
url: 'https://media.gettyimages.com/vectors/cat-eating-fish-vector-id1216628506',
|
|
47
|
+
height: '1024',
|
|
56
48
|
width: '1024'
|
|
57
49
|
},
|
|
58
50
|
{
|
|
59
|
-
url: 'https://
|
|
60
|
-
height: '
|
|
51
|
+
url: 'https://www.ariatrade.gr/images/products/2021/10/110294_1.jpg',
|
|
52
|
+
height: '768',
|
|
61
53
|
width: '1024'
|
|
62
54
|
},
|
|
63
55
|
{
|
|
64
|
-
url: 'https://media.
|
|
65
|
-
height: '
|
|
56
|
+
url: 'https://media.gettyimages.com/illustrations/panther-leaping-illustration-id152406879?s=2048x2048',
|
|
57
|
+
height: '2048',
|
|
66
58
|
width: '2048'
|
|
67
59
|
},
|
|
68
|
-
|
|
60
|
+
{
|
|
61
|
+
url: 'https://media.gettyimages.com/illustrations/botany-plants-antique-engraving-illustration-erythrina-variegata-illustration-id970781520',
|
|
62
|
+
height: '1024',
|
|
63
|
+
width: '828'
|
|
64
|
+
}
|
|
69
65
|
]
|
|
66
|
+
...
|
|
70
67
|
}
|
|
71
68
|
```
|
|
72
69
|
|
|
73
70
|
## How to use ?
|
|
74
71
|
|
|
75
|
-
- For the query parameter you need to set the name in upper case
|
|
72
|
+
- **For the query parameter you need to set the name in upper case !**
|
|
73
|
+
|
|
74
|
+
## Simple example
|
|
75
|
+
|
|
76
|
+
Search cats images
|
|
76
77
|
|
|
77
78
|
```js
|
|
78
|
-
|
|
79
|
+
(async function(){
|
|
80
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
81
|
+
search: "cats",
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
console.log(test);
|
|
85
|
+
})();
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Removing gstatic.com
|
|
89
|
+
|
|
90
|
+
```js
|
|
91
|
+
(async function(){
|
|
92
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
93
|
+
search: "demon slayer background hd",
|
|
94
|
+
execute: function(element){
|
|
95
|
+
if(!element.url.match('gstatic.com')) return element;
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
console.log(test);
|
|
100
|
+
})();
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Custom query
|
|
79
104
|
|
|
80
|
-
|
|
105
|
+
All query options are optional (see below for all the options)
|
|
81
106
|
|
|
107
|
+
```js
|
|
82
108
|
(async function(){
|
|
83
109
|
const test = await GOOGLE_IMG_SCRAP({
|
|
84
110
|
search: "cats",
|
|
@@ -90,50 +116,91 @@ console.log(GOOGLE_QUERY);
|
|
|
90
116
|
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
91
117
|
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
|
|
92
118
|
},
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
console.log(test);
|
|
122
|
+
})();
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Limit result size
|
|
126
|
+
|
|
127
|
+
```js
|
|
128
|
+
(async function(){
|
|
129
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
130
|
+
search: "cats",
|
|
131
|
+
limit: 5,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
console.log(test);
|
|
135
|
+
})();
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Domains
|
|
139
|
+
|
|
140
|
+
Only scrap from a specific domain
|
|
141
|
+
|
|
142
|
+
```js
|
|
143
|
+
(async function(){
|
|
144
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
145
|
+
search: "cats",
|
|
93
146
|
domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
|
|
94
|
-
excludeWords: ["black", "white"], //If you don't like black and white cats
|
|
95
|
-
custom: "name=content&name2=content2",
|
|
96
|
-
safeSearch: false,
|
|
97
|
-
// excludeDomains: ["istockphoto.com", "alamy.com"]
|
|
98
147
|
});
|
|
99
148
|
|
|
100
|
-
console.log(test
|
|
149
|
+
console.log(test);
|
|
101
150
|
})();
|
|
102
151
|
```
|
|
103
152
|
|
|
104
|
-
|
|
153
|
+
## Exclude domains
|
|
105
154
|
|
|
106
155
|
```js
|
|
107
|
-
|
|
156
|
+
(async function(){
|
|
157
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
158
|
+
search: "cats",
|
|
159
|
+
excludeDomains: ["istockphoto.com", "alamy.com"]
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
console.log(test);
|
|
163
|
+
})();
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Exclude words
|
|
108
167
|
|
|
168
|
+
If you don' like black cats and white cats
|
|
169
|
+
|
|
170
|
+
```js
|
|
109
171
|
(async function(){
|
|
110
172
|
const test = await GOOGLE_IMG_SCRAP({
|
|
111
173
|
search: "cats",
|
|
174
|
+
excludeWords: ["black", "white"], //If you don't like black cats and white cats
|
|
112
175
|
});
|
|
113
176
|
|
|
114
177
|
console.log(test, test.result.length);
|
|
115
178
|
})();
|
|
116
179
|
```
|
|
117
180
|
|
|
118
|
-
##
|
|
181
|
+
## Safe search (no nsfw)
|
|
119
182
|
|
|
120
183
|
```js
|
|
121
|
-
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
122
|
-
|
|
123
184
|
(async function(){
|
|
124
185
|
const test = await GOOGLE_IMG_SCRAP({
|
|
125
|
-
search: "
|
|
126
|
-
query: {
|
|
127
|
-
SIZE: GOOGLE_QUERY.SIZE.LARGE,
|
|
128
|
-
},
|
|
129
|
-
domains: ["alphacoders.com"],
|
|
186
|
+
search: "cats",
|
|
130
187
|
safeSearch: false,
|
|
131
|
-
execute: function(element){
|
|
132
|
-
if(!element.url.match('gstatic.com')) return element;
|
|
133
|
-
}
|
|
134
188
|
});
|
|
135
189
|
|
|
136
|
-
console.log(test
|
|
190
|
+
console.log(test);
|
|
191
|
+
})();
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Custom query params
|
|
195
|
+
|
|
196
|
+
```js
|
|
197
|
+
(async function(){
|
|
198
|
+
const test = await GOOGLE_IMG_SCRAP({
|
|
199
|
+
search: "cats",
|
|
200
|
+
custom: "name=content&name2=content2",
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
console.log(test);
|
|
137
204
|
})();
|
|
138
205
|
```
|
|
139
206
|
|
|
@@ -142,8 +209,6 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
|
142
209
|
- urlMatch work like filterByTiles
|
|
143
210
|
|
|
144
211
|
```js
|
|
145
|
-
const { GOOGLE_IMG_SCRAP } = require('google-img-scrap');
|
|
146
|
-
|
|
147
212
|
(async function(){
|
|
148
213
|
const test = await GOOGLE_IMG_SCRAP({
|
|
149
214
|
search: "cats",
|
|
@@ -152,12 +217,9 @@ const { GOOGLE_IMG_SCRAP } = require('google-img-scrap');
|
|
|
152
217
|
["draw", "white"],
|
|
153
218
|
["albino", "white"]
|
|
154
219
|
],
|
|
155
|
-
execute: function(element){
|
|
156
|
-
if(!element.url.match('gstatic.com')) return element;
|
|
157
|
-
}
|
|
158
220
|
});
|
|
159
221
|
|
|
160
|
-
console.log(test
|
|
222
|
+
console.log(test);
|
|
161
223
|
})();
|
|
162
224
|
```
|
|
163
225
|
|
|
@@ -212,4 +274,4 @@ const { GOOGLE_IMG_SCRAP } = require('google-img-scrap');
|
|
|
212
274
|
COMMERCIAL_AND_OTHER
|
|
213
275
|
}
|
|
214
276
|
}
|
|
215
|
-
```
|
|
277
|
+
```
|
package/package.json
CHANGED
|
@@ -1,41 +1,48 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "google-img-scrap",
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "Scrap images from google images with
|
|
5
|
-
"main": "./src/google-img-scrap.js",
|
|
6
|
-
"
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "google-img-scrap",
|
|
3
|
+
"version": "1.0.7",
|
|
4
|
+
"description": "Scrap images from google images with customs pre filled options",
|
|
5
|
+
"main": "./src/google-img-scrap.js",
|
|
6
|
+
"types": "./types/index.d.ts",
|
|
7
|
+
"directories": {
|
|
8
|
+
"test": "test"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"test": "node ./test/test.js"
|
|
12
|
+
},
|
|
13
|
+
"repository": {
|
|
14
|
+
"type": "git",
|
|
15
|
+
"url": "git+https://github.com/yoannchb-pro/google-img-scrap.git"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"google",
|
|
19
|
+
"image",
|
|
20
|
+
"scrap",
|
|
21
|
+
"options",
|
|
22
|
+
"query",
|
|
23
|
+
"powerfull",
|
|
24
|
+
"easy",
|
|
25
|
+
"type",
|
|
26
|
+
"color",
|
|
27
|
+
"extension",
|
|
28
|
+
"filter",
|
|
29
|
+
"date",
|
|
30
|
+
"licence"
|
|
31
|
+
],
|
|
32
|
+
"author": "yoannchb",
|
|
33
|
+
"contributors": [
|
|
34
|
+
{
|
|
35
|
+
"name": "christophe77",
|
|
36
|
+
"url": "https://github.com/christophe77"
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"license": "MIT",
|
|
40
|
+
"bugs": {
|
|
41
|
+
"url": "https://github.com/yoannchb-pro/google-img-scrap/issues"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://github.com/yoannchb-pro/google-img-scrap#readme",
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"fast-html-dom-parser": "^1.0.5",
|
|
46
|
+
"got": "^11.0.0"
|
|
47
|
+
}
|
|
48
|
+
}
|
package/src/google-img-scrap.js
CHANGED
|
@@ -1,150 +1,178 @@
|
|
|
1
|
-
const got = require(
|
|
2
|
-
const { FastHTMLParser } = require(
|
|
1
|
+
const got = require("got");
|
|
2
|
+
const { FastHTMLParser } = require("fast-html-dom-parser");
|
|
3
3
|
|
|
4
|
-
const { GOOGLE_CONSTANT } = require(
|
|
5
|
-
const { GOOGLE_QUERY } = require(
|
|
4
|
+
const { GOOGLE_CONSTANT } = require("./constant/GOOGLE_CONSTANT");
|
|
5
|
+
const { GOOGLE_QUERY } = require("./constant/query/GOOGLE_QUERY");
|
|
6
6
|
const { TRANSLATOR } = require("./constant/translator/TRANSLATOR");
|
|
7
|
-
const EXTENSIONS = require(
|
|
7
|
+
const EXTENSIONS = require("./constant/extensions/IMAGES_EXTENSIONS.json");
|
|
8
8
|
|
|
9
|
-
const { buildQuery, unicodeToChar } = require(
|
|
9
|
+
const { buildQuery, unicodeToChar } = require("./utils/UTILS");
|
|
10
10
|
|
|
11
11
|
//verify good configuration
|
|
12
|
-
function verify(config){
|
|
13
|
-
|
|
12
|
+
function verify(config) {
|
|
13
|
+
if (config.excludeDomains && config.domains)
|
|
14
|
+
throw "Can not set 'excludeDomains' and 'domains' as same times";
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
if (!config.search || config.search.trim() == "")
|
|
17
|
+
throw "'search' can not be empty";
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
for(const key of Object.keys(config.query)){
|
|
21
|
-
if(!queryToVerify.includes(key)) throw `Invalide query name '${key}'`;
|
|
19
|
+
if (config.query) {
|
|
20
|
+
const queryToVerify = Object.keys(GOOGLE_QUERY);
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
for (const key of Object.keys(config.query)) {
|
|
23
|
+
if (!queryToVerify.includes(key)) throw `Invalide query name '${key}'`;
|
|
24
|
+
|
|
25
|
+
const VALUES = Object.values(GOOGLE_QUERY[key]);
|
|
26
|
+
const ACTUAL_VALUE = config.query[key];
|
|
27
|
+
if (!VALUES.includes(ACTUAL_VALUE))
|
|
28
|
+
throw `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`;
|
|
27
29
|
}
|
|
28
|
-
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
29
32
|
|
|
30
33
|
//verify imag extension
|
|
31
|
-
function containImage(content = ""){
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
function containImage(content = "") {
|
|
35
|
+
for (const EXTENSION of EXTENSIONS) {
|
|
36
|
+
if (content.includes(EXTENSION)) return true;
|
|
37
|
+
}
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
return false;
|
|
37
40
|
}
|
|
38
41
|
|
|
39
42
|
//parse HTML
|
|
40
|
-
async function parse(url){
|
|
41
|
-
|
|
43
|
+
async function parse(url) {
|
|
44
|
+
const result = [];
|
|
42
45
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
const response = await got(url, {
|
|
47
|
+
headers: GOOGLE_CONSTANT.headers,
|
|
48
|
+
});
|
|
49
|
+
const parser = new FastHTMLParser(response.body);
|
|
47
50
|
|
|
48
|
-
|
|
51
|
+
const scripts = parser.getElementsByTagName("script");
|
|
49
52
|
|
|
50
|
-
|
|
53
|
+
if (!scripts) return result;
|
|
51
54
|
|
|
52
|
-
|
|
53
|
-
|
|
55
|
+
for (const script of scripts) {
|
|
56
|
+
const body = script.innerHTML;
|
|
54
57
|
|
|
55
|
-
|
|
58
|
+
const valide = containImage(body);
|
|
56
59
|
|
|
57
|
-
|
|
58
|
-
|
|
60
|
+
if (valide) {
|
|
61
|
+
const regex = /\["(http.+?)",(\d+),(\d+)\]/gi;
|
|
59
62
|
|
|
60
|
-
|
|
63
|
+
let res = null;
|
|
61
64
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
65
|
+
while ((res = regex.exec(body)) != null) {
|
|
66
|
+
if (res.length >= 4 && res[1].match(/http/gi).length < 2)
|
|
67
|
+
result.push({
|
|
68
|
+
url: unicodeToChar(res[1]),
|
|
69
|
+
height: res[2],
|
|
70
|
+
width: res[3],
|
|
71
|
+
});
|
|
72
|
+
}
|
|
70
73
|
}
|
|
74
|
+
}
|
|
71
75
|
|
|
72
|
-
|
|
73
|
-
}
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
74
78
|
|
|
75
79
|
//main
|
|
76
|
-
async function GOOGLE_IMG_SCRAP(config = {}){
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
80
|
+
async function GOOGLE_IMG_SCRAP(config = {}) {
|
|
81
|
+
//verify config
|
|
82
|
+
verify(config);
|
|
83
|
+
|
|
84
|
+
//exclude domains
|
|
85
|
+
const EXCLUDE_DOMAINS = [];
|
|
86
|
+
if (config.excludeDomains)
|
|
87
|
+
config.excludeDomains.forEach((domain) =>
|
|
88
|
+
EXCLUDE_DOMAINS.push(`-site:"${domain}"`)
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
//domains
|
|
92
|
+
const DOMAINS = [];
|
|
93
|
+
if (config.domains)
|
|
94
|
+
config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`));
|
|
95
|
+
|
|
96
|
+
//exclude words
|
|
97
|
+
const EXCLUDE_WORDS = [];
|
|
98
|
+
if (config.excludeWords)
|
|
99
|
+
config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`));
|
|
100
|
+
|
|
101
|
+
//filter by titles
|
|
102
|
+
const FILTER_TITLE = [];
|
|
103
|
+
if (config.filterByTitles)
|
|
104
|
+
config.filterByTitles.forEach((titleFilter) => {
|
|
105
|
+
const value = titleFilter.map((title) => {
|
|
106
|
+
return `intitle:"${title}"`;
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
FILTER_TITLE.push(`(${value.join(" AND ")})`);
|
|
102
110
|
});
|
|
103
111
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
URL_MATCH.push(`(${value.join(' AND ')})`);
|
|
113
|
-
|
|
114
|
-
});
|
|
112
|
+
//url match words
|
|
113
|
+
const URL_MATCH = [];
|
|
114
|
+
if (config.urlMatch)
|
|
115
|
+
config.urlMatch.forEach((urlMatch) => {
|
|
116
|
+
const value = urlMatch.map((content) => {
|
|
117
|
+
return `inurl:${content}`;
|
|
118
|
+
});
|
|
115
119
|
|
|
116
|
-
|
|
117
|
-
const SEARCH_TERM = config.search +
|
|
118
|
-
" " + URL_MATCH.join(" OR ") +
|
|
119
|
-
" " + FILTER_TITLE.join(" OR ") +
|
|
120
|
-
" " + EXCLUDE_WORDS.join(" ") +
|
|
121
|
-
" " + EXCLUDE_DOMAINS.join(" ") +
|
|
122
|
-
" " + DOMAINS.join(" OR ");
|
|
123
|
-
|
|
124
|
-
const SEARCH = encodeURIComponent(SEARCH_TERM.trim())
|
|
125
|
-
const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
|
|
126
|
-
[GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(','),
|
|
127
|
-
q: SEARCH,
|
|
120
|
+
URL_MATCH.push(`(${value.join(" AND ")})`);
|
|
128
121
|
});
|
|
129
122
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
123
|
+
//building url
|
|
124
|
+
const SEARCH_TERM =
|
|
125
|
+
config.search +
|
|
126
|
+
" " +
|
|
127
|
+
URL_MATCH.join(" OR ") +
|
|
128
|
+
" " +
|
|
129
|
+
FILTER_TITLE.join(" OR ") +
|
|
130
|
+
" " +
|
|
131
|
+
EXCLUDE_WORDS.join(" ") +
|
|
132
|
+
" " +
|
|
133
|
+
EXCLUDE_DOMAINS.join(" ") +
|
|
134
|
+
" " +
|
|
135
|
+
DOMAINS.join(" OR ");
|
|
136
|
+
|
|
137
|
+
const SEARCH = encodeURIComponent(SEARCH_TERM.trim());
|
|
138
|
+
const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
|
|
139
|
+
[GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","),
|
|
140
|
+
q: SEARCH,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const CUSTOM_PARAM = config.custom ? `&${config.custom}` : "";
|
|
144
|
+
const SAFE_SEARCH = config.safeSearch ? `&safe=active` : "";
|
|
145
|
+
|
|
146
|
+
const URL =
|
|
147
|
+
GOOGLE_CONSTANT.url +
|
|
148
|
+
buildQuery(QUERY, TRANSLATOR) +
|
|
149
|
+
CUSTOM_PARAM +
|
|
150
|
+
SAFE_SEARCH;
|
|
151
|
+
|
|
152
|
+
//parsing
|
|
153
|
+
const result = await parse(URL);
|
|
154
|
+
|
|
155
|
+
//excute function
|
|
156
|
+
let finalResult = [];
|
|
157
|
+
if (config.execute)
|
|
158
|
+
result.forEach((element) => {
|
|
159
|
+
const value = config.execute(element);
|
|
160
|
+
if (value) finalResult.push(value);
|
|
143
161
|
});
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
};
|
|
162
|
+
else finalResult = result;
|
|
163
|
+
|
|
164
|
+
//limit result
|
|
165
|
+
let slicedResult = [];
|
|
166
|
+
const { limit } = config;
|
|
167
|
+
|
|
168
|
+
if (limit && limit > 0 && finalResult.length > limit) {
|
|
169
|
+
slicedResult = finalResult.slice(0, limit);
|
|
170
|
+
}
|
|
171
|
+
//result
|
|
172
|
+
return {
|
|
173
|
+
url: URL,
|
|
174
|
+
result: slicedResult.length > 0 ? slicedResult : finalResult,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
149
177
|
|
|
150
|
-
module.exports = { GOOGLE_IMG_SCRAP
|
|
178
|
+
module.exports = { GOOGLE_IMG_SCRAP, GOOGLE_QUERY };
|
package/src/utils/UTILS.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
function buildQuery(query, translator){
|
|
2
|
-
|
|
2
|
+
const result = [];
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
const params = Object.keys(query);
|
|
5
|
+
const toTranslate = Object.keys(translator);
|
|
6
6
|
|
|
7
7
|
for(const param of params){
|
|
8
|
-
|
|
8
|
+
const queryName = param;
|
|
9
9
|
if(toTranslate.includes(param)) queryName = toTranslate[param];
|
|
10
10
|
|
|
11
11
|
result.push(`${queryName}=${query[param]}`);
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
|
|
2
|
+
|
|
3
|
+
(async function () {
|
|
4
|
+
const limit = 5;
|
|
5
|
+
const testNoLimit = await GOOGLE_IMG_SCRAP({
|
|
6
|
+
search: "cats",
|
|
7
|
+
execute: function (element) {
|
|
8
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
9
|
+
},
|
|
10
|
+
});
|
|
11
|
+
const testLimit = await GOOGLE_IMG_SCRAP({
|
|
12
|
+
search: "cats",
|
|
13
|
+
limit,
|
|
14
|
+
execute: function (element) {
|
|
15
|
+
if (!element.url.match("gstatic.com")) return element;
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
try {
|
|
19
|
+
console.log(
|
|
20
|
+
`limit : ${limit}, testNoLimit length : ${testNoLimit.result.length}, testLimit length : ${testLimit.result.length}`
|
|
21
|
+
);
|
|
22
|
+
} catch (error) {
|
|
23
|
+
console.log(error);
|
|
24
|
+
}
|
|
25
|
+
})();
|
package/test/test.js
CHANGED
|
@@ -13,7 +13,6 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
|
13
13
|
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
14
14
|
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
|
|
15
15
|
},
|
|
16
|
-
domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
|
|
17
16
|
excludeWords: ["black", "white"], //If you don't like black and white cats
|
|
18
17
|
custom: "name=content&name2=content2",
|
|
19
18
|
safeSearch: false,
|
package/types/index.d.ts
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
type Config = {
|
|
2
|
+
search: string;
|
|
3
|
+
limit?:number;
|
|
4
|
+
query?: {
|
|
5
|
+
TYPE?: string;
|
|
6
|
+
DATE?: string;
|
|
7
|
+
COLOR?: string;
|
|
8
|
+
SIZE?: string;
|
|
9
|
+
LICENCE?: string;
|
|
10
|
+
EXTENSION?: string;
|
|
11
|
+
};
|
|
12
|
+
domains?: string[];
|
|
13
|
+
excludeWords?: string[];
|
|
14
|
+
custom?: string;
|
|
15
|
+
safeSearch?: boolean;
|
|
16
|
+
excludeDomains?: string[];
|
|
17
|
+
execute?: (element: FinalResult) => FinalResult | undefined;
|
|
18
|
+
filterByTitles?: [string[]];
|
|
19
|
+
};
|
|
20
|
+
type FinalResult = {
|
|
21
|
+
url: string;
|
|
22
|
+
height: string;
|
|
23
|
+
width: string;
|
|
24
|
+
};
|
|
25
|
+
type Results = {
|
|
26
|
+
url: string;
|
|
27
|
+
result: FinalResult[];
|
|
28
|
+
};
|
|
29
|
+
type GoogleQuery = {
|
|
30
|
+
SIZE: {
|
|
31
|
+
LARGE: string;
|
|
32
|
+
MEDIUM: string;
|
|
33
|
+
ICON: string;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
COLOR: {
|
|
37
|
+
BLACK_AND_WHITE: string;
|
|
38
|
+
TRANSPARENT: string;
|
|
39
|
+
RED: string;
|
|
40
|
+
BLUE: string;
|
|
41
|
+
PURPLE: string;
|
|
42
|
+
ORANGE: string;
|
|
43
|
+
YELLOW: string;
|
|
44
|
+
GREEN: string;
|
|
45
|
+
TEAL: string;
|
|
46
|
+
PINK: string;
|
|
47
|
+
WHITE: string;
|
|
48
|
+
GRAY: string;
|
|
49
|
+
BLACK: string;
|
|
50
|
+
BROWN: string;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
TYPE: {
|
|
54
|
+
CLIPART: string;
|
|
55
|
+
DRAW: string;
|
|
56
|
+
GIF: string;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
EXTENSION: {
|
|
60
|
+
JPG: "jpg";
|
|
61
|
+
GIF: "gif";
|
|
62
|
+
BMP: "bmp";
|
|
63
|
+
PNG: "png";
|
|
64
|
+
SVG: "svg";
|
|
65
|
+
WEBP: "webp";
|
|
66
|
+
ICO: "ico";
|
|
67
|
+
RAW: "raw";
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
DATE: {
|
|
71
|
+
DAY: string;
|
|
72
|
+
WEEK: string;
|
|
73
|
+
MONTH: string;
|
|
74
|
+
YEAR: string;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
LICENCE: {
|
|
78
|
+
CREATIVE_COMMONS: string;
|
|
79
|
+
COMMERCIAL_AND_OTHER: string;
|
|
80
|
+
};
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* GOOGLE_IMG_SCRAP
|
|
85
|
+
*
|
|
86
|
+
* @param {Config} config
|
|
87
|
+
* @returns {Results}
|
|
88
|
+
*/
|
|
89
|
+
export declare function GOOGLE_IMG_SCRAP(config: Config): Results;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* GOOGLE_QUERY
|
|
93
|
+
*
|
|
94
|
+
* @returns {GoogleQuery}
|
|
95
|
+
*/
|
|
96
|
+
export declare const GOOGLE_QUERY: GoogleQuery;
|