google-img-scrap 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -19
- package/package.json +1 -1
- package/src/google-img-scrap.js +10 -2
- package/test/test.js +4 -3
package/README.md
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
# Google-img-scrap v1.0.
|
|
1
|
+
# Google-img-scrap v1.0.2
|
|
2
2
|
Scrap images from google image with lot of options
|
|
3
3
|
|
|
4
4
|
## Update
|
|
5
5
|
|
|
6
6
|
- Added the missing dependencie
|
|
7
|
+
- Cannot set 'domains' and 'excludeDomains' as same time
|
|
8
|
+
- Fixed some bugs
|
|
9
|
+
- New option ```excludeWords```
|
|
7
10
|
|
|
8
11
|
## Found a bug ?
|
|
9
12
|
|
|
@@ -17,21 +20,53 @@ npm i google-img-scrap
|
|
|
17
20
|
|
|
18
21
|
## Import
|
|
19
22
|
|
|
23
|
+
- NPM
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
- From GITHUB
|
|
30
|
+
|
|
20
31
|
```js
|
|
21
|
-
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('
|
|
32
|
+
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('./src/google-img-scrap');
|
|
22
33
|
```
|
|
23
34
|
|
|
35
|
+
## Params
|
|
36
|
+
|
|
37
|
+
- "search" (String) what you want to search
|
|
38
|
+
- "excludeWords" (Array of String) exclude some words from the search
|
|
39
|
+
- "domains" (Array of String) filter by domains
|
|
40
|
+
- "excludeDomains" (Array of String) exclude some domains
|
|
41
|
+
- "safeSearch" (Boolean) active safe search or not for nsfw for example
|
|
42
|
+
- "custom" (String) add extra query
|
|
43
|
+
- "query" (Object) set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items, you can see example behind)
|
|
44
|
+
|
|
24
45
|
## Result
|
|
25
46
|
|
|
26
47
|
```js
|
|
27
|
-
|
|
48
|
+
}
|
|
49
|
+
{
|
|
50
|
+
url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats',
|
|
51
|
+
result: [
|
|
52
|
+
{
|
|
53
|
+
url: 'https://media.istockphoto.com/vectors/black-cats-set-vector-id599123506',
|
|
54
|
+
height: '806',
|
|
55
|
+
width: '1024'
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
url: 'https://media.istockphoto.com/vectors/cats-vector-id455327075',
|
|
59
|
+
height: '860',
|
|
60
|
+
width: '1024'
|
|
61
|
+
},
|
|
28
62
|
{
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
63
|
+
url: 'https://media.istockphoto.com/vectors/purring-cats-vector-silhouette-vector-id165749810?s=2048x2048',
|
|
64
|
+
height: '1895',
|
|
65
|
+
width: '2048'
|
|
32
66
|
},
|
|
33
67
|
...
|
|
34
|
-
]
|
|
68
|
+
]
|
|
69
|
+
}
|
|
35
70
|
```
|
|
36
71
|
|
|
37
72
|
## How to use ?
|
|
@@ -39,7 +74,9 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
|
39
74
|
- For the query parameter you need to set the name in upper case !
|
|
40
75
|
|
|
41
76
|
```js
|
|
42
|
-
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('
|
|
77
|
+
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
78
|
+
|
|
79
|
+
console.log(GOOGLE_QUERY);
|
|
43
80
|
|
|
44
81
|
(async function(){
|
|
45
82
|
const test = await GOOGLE_IMG_SCRAP({
|
|
@@ -52,10 +89,11 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
|
52
89
|
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
53
90
|
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
|
|
54
91
|
},
|
|
55
|
-
domains: [],
|
|
92
|
+
domains: ["alamy.com", "istockphoto.com", "vecteezy.com", "gstatic.com"],
|
|
93
|
+
excludeWords: ["black", "white"], //If you don't like black and white cats
|
|
56
94
|
custom: "name=content&name2=content2",
|
|
57
95
|
safeSearch: false,
|
|
58
|
-
excludeDomains: []
|
|
96
|
+
// excludeDomains: ["gstatic.com", "istockphoto.com", "alamy.com"]
|
|
59
97
|
});
|
|
60
98
|
|
|
61
99
|
console.log(test, test.result.length);
|
|
@@ -65,7 +103,7 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
|
65
103
|
OR ALSO
|
|
66
104
|
|
|
67
105
|
```js
|
|
68
|
-
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('
|
|
106
|
+
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
|
|
69
107
|
|
|
70
108
|
(async function(){
|
|
71
109
|
const test = await GOOGLE_IMG_SCRAP({
|
|
@@ -75,14 +113,6 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
|
75
113
|
console.log(test, test.result.length);
|
|
76
114
|
})();
|
|
77
115
|
```
|
|
78
|
-
## Params
|
|
79
|
-
|
|
80
|
-
- "search" what you want to search
|
|
81
|
-
- "domains" filter by domains
|
|
82
|
-
- "excludeDomains" exclude some domains
|
|
83
|
-
- "safeSearch" active safe search or not
|
|
84
|
-
- "custom" add extra query
|
|
85
|
-
- "query" set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION])
|
|
86
116
|
|
|
87
117
|
## Google query
|
|
88
118
|
|
package/package.json
CHANGED
package/src/google-img-scrap.js
CHANGED
|
@@ -10,6 +10,8 @@ const { buildQuery, unicodeToChar } = require('./utils/UTILS');
|
|
|
10
10
|
|
|
11
11
|
//verify good configuration
|
|
12
12
|
function verify(config){
|
|
13
|
+
if(config.excludeDomains && config.domains) throw "Can not set 'excludeDomains' and 'domains' as same times";
|
|
14
|
+
|
|
13
15
|
if(!config.search || config.search.trim() == "") throw "'search' can not be empty";
|
|
14
16
|
|
|
15
17
|
if(config.query){
|
|
@@ -77,16 +79,22 @@ async function GOOGLE_IMG_SCRAP(config = {}){
|
|
|
77
79
|
|
|
78
80
|
//exclude domains
|
|
79
81
|
const EXCLUDE_DOMAINS = [];
|
|
80
|
-
if(config.excludeDomains) config.excludeDomains.forEach((domain) => EXCLUDE_DOMAINS.push(`-site
|
|
82
|
+
if(config.excludeDomains) config.excludeDomains.forEach((domain) => EXCLUDE_DOMAINS.push(`-site:"${domain}"`));
|
|
81
83
|
|
|
82
84
|
//domains
|
|
83
85
|
const DOMAINS = [];
|
|
84
86
|
if(config.domains) config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`));
|
|
85
87
|
|
|
88
|
+
//exclude words
|
|
89
|
+
const EXCLUDE_WORDS = [];
|
|
90
|
+
if(config.excludeWords) config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`));
|
|
91
|
+
|
|
86
92
|
//building url
|
|
93
|
+
const SEARCH_TERM = config.search + " " + EXCLUDE_WORDS.join(" ") + " " + EXCLUDE_DOMAINS.join(" ") + " " + DOMAINS.join(' OR ');
|
|
94
|
+
const SEARCH = encodeURIComponent(SEARCH_TERM.trim())
|
|
87
95
|
const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
|
|
88
96
|
[GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(','),
|
|
89
|
-
q:
|
|
97
|
+
q: SEARCH,
|
|
90
98
|
});
|
|
91
99
|
|
|
92
100
|
const CUSTOM_PARAM = config.custom ? `&${config.custom}` : "";
|
package/test/test.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
|
|
2
2
|
|
|
3
|
-
console.log(GOOGLE_QUERY);
|
|
3
|
+
// console.log(GOOGLE_QUERY);
|
|
4
4
|
|
|
5
5
|
(async function(){
|
|
6
6
|
const test = await GOOGLE_IMG_SCRAP({
|
|
@@ -13,10 +13,11 @@ console.log(GOOGLE_QUERY);
|
|
|
13
13
|
LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
|
|
14
14
|
EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
|
|
15
15
|
},
|
|
16
|
-
domains: [],
|
|
16
|
+
domains: ["alamy.com", "istockphoto.com", "vecteezy.com", "gstatic.com"],
|
|
17
|
+
excludeWords: ["black", "white"], //If you don't like black and white cats
|
|
17
18
|
custom: "name=content&name2=content2",
|
|
18
19
|
safeSearch: false,
|
|
19
|
-
excludeDomains: []
|
|
20
|
+
// excludeDomains: ["gstatic.com", "istockphoto.com", "alamy.com"]
|
|
20
21
|
});
|
|
21
22
|
|
|
22
23
|
console.log(test, test.result.length);
|