google-img-scrap 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,27 +1,39 @@
1
1
  # Changelog
2
2
 
3
+ ### 1.0.8
4
+
5
+ - Fixed "ERROR: Cannot assign to "queryName" because it is a constant" (by GaspardCulis)
6
+ - Removed gstatic url
7
+ - Added average color, id, title and originalUrl
8
+
3
9
  ### 1.0.7
10
+
4
11
  - Readme update
5
12
 
6
13
  ### 1.0.6
14
+
7
15
  - Fixed types
8
- - Added ```limit``` to limit the size of the results
16
+ - Added `limit` to limit the size of the results
9
17
 
10
18
  ### 1.0.5
19
+
11
20
  - Added types (by christophe77)
12
21
 
13
22
  ### v1.0.4
14
23
 
15
- - New option ```urlMatch```. You now get image when an url match a string (example: "cdn")
16
- - New option ```filterByTitles```. Filter images by titles
24
+ - New option `urlMatch`. You now get image when an url match a string (example: "cdn")
25
+ - New option `filterByTitles`. Filter images by titles
17
26
 
18
27
  ### v1.0.3
19
- - New option ```execute```. allow you to execute a function to remove "gstatic.com" domains for example
28
+
29
+ - New option `execute`. allow you to execute a function to remove "gstatic.com" domains for example
20
30
 
21
31
  ### v1.0.2
32
+
22
33
  - Cannot set 'domains' and 'excludeDomains' as same time
23
34
  - Fixed some bugs
24
- - New option ```excludeWords```
35
+ - New option `excludeWords`
25
36
 
26
37
  ### v1.0.1
27
- - Added the missing dependencie
38
+
39
+ - Added the missing dependencie
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # Google-img-scrap v1.0.7
1
+ # Google-img-scrap
2
2
 
3
- Scrap images from google images with customs pre filled options
3
+ Scrap images from google images with customs pre filled dorking options
4
4
 
5
5
  ## Update
6
6
 
@@ -19,22 +19,22 @@ npm i google-img-scrap
19
19
  ## Import
20
20
 
21
21
  ```js
22
- const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
22
+ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("google-img-scrap");
23
23
  ```
24
24
 
25
25
  ## Query Params
26
26
 
27
- - "search" (String) what you want to search
28
- - "execute" (Function) allow you to execute a function to remove "gstatic.com" domains for example
29
- - "excludeWords" (String[]) exclude some words from the search
30
- - "domains" (String[]) filter by domains
31
- - "excludeDomains" (String[]) exclude some domains
32
- - "safeSearch" (Boolean) active safe search or not for nsfw for example
33
- - "custom" (String) add extra query
34
- - "urlMatch" (String[][]) get image when an url match a string (example: "cdn") | ```example below```
35
- - "filterByTitles" (String[][]) filter images by titles | ```example below```
36
- - "query" (Object) set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | ```example below```
37
- - "limit" (Int) to limit the size of the results
27
+ - "search" `string` what you want to search
28
+ - "execute" `(element: FinalResult) => FinalResult | undefined` allow you to execute a function to filter results
29
+ - "excludeWords" `string[]` exclude some words from the search
30
+ - "domains" `string[]` filter by domains
31
+ - "excludeDomains" `string[]` exclude some domains
32
+ - "safeSearch" `boolean` active safe search or not for nsfw for example
33
+ - "custom" `string` add extra query
34
+ - "urlMatch" `string[][]` get image when an url match a string (example: "cdn") | `example below`
35
+ - "filterByTitles" `string[][]` filter images by titles | `example below`
36
+ - "query" `GoogleQuery` set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | `example below`
37
+ - "limit" `number` to limit the size of the results
38
38
 
39
39
  ## Result
40
40
 
@@ -43,60 +43,60 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
43
43
  url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats%20%20%20-%22black%22%20-%22white%22&name=content&name2=content2',
44
44
  result: [
45
45
  {
46
+ id: "HA6fW6faerBfPM",
47
+ title: "CAT eating a fish",
48
+ originalUrl: "https://media.gettyimages.com/vectors/cat-article.html",
46
49
  url: 'https://media.gettyimages.com/vectors/cat-eating-fish-vector-id1216628506',
47
- height: '1024',
48
- width: '1024'
50
+ averageColor: "rgb(241, 25, 60)",
51
+ averageColorObject: { r: 241, g: 25, b: 60},
52
+ height: 1024,
53
+ width: 1024
49
54
  },
50
55
  {
56
+ id: "OPSfyUtrsrYUI",
57
+ title: "Cat",
58
+ originalUrl: "https://www.ariatrade.gr/images/products/2021/10/article.html",
51
59
  url: 'https://www.ariatrade.gr/images/products/2021/10/110294_1.jpg',
52
- height: '768',
53
- width: '1024'
60
+ averageColor: "rgb(201, 250, 65)",
61
+ averageColorObject: { r: 201, g: 250, b: 65},
62
+ height: 768,
63
+ width: 1024
54
64
  },
55
- {
56
- url: 'https://media.gettyimages.com/illustrations/panther-leaping-illustration-id152406879?s=2048x2048',
57
- height: '2048',
58
- width: '2048'
59
- },
60
- {
61
- url: 'https://media.gettyimages.com/illustrations/botany-plants-antique-engraving-illustration-erythrina-variegata-illustration-id970781520',
62
- height: '1024',
63
- width: '828'
64
- }
65
+ ...
65
66
  ]
66
- ...
67
67
  }
68
68
  ```
69
69
 
70
70
  ## How to use ?
71
71
 
72
- - **For the query parameter you need to set the name in upper case !**
72
+ **NOTE**: For the query parameter you need to set the name in upper case !
73
73
 
74
74
  ## Simple example
75
75
 
76
76
  Search cats images
77
77
 
78
78
  ```js
79
- (async function(){
80
- const test = await GOOGLE_IMG_SCRAP({
81
- search: "cats",
82
- });
79
+ (async function () {
80
+ const test = await GOOGLE_IMG_SCRAP({
81
+ search: "cats",
82
+ });
83
83
 
84
- console.log(test);
84
+ console.log(test);
85
85
  })();
86
86
  ```
87
87
 
88
- ## Removing gstatic.com
88
+ ## Filtering
89
89
 
90
90
  ```js
91
- (async function(){
92
- const test = await GOOGLE_IMG_SCRAP({
93
- search: "demon slayer background hd",
94
- execute: function(element){
95
- if(!element.url.match('gstatic.com')) return element;
96
- }
97
- });
98
-
99
- console.log(test);
91
+ (async function () {
92
+ const test = await GOOGLE_IMG_SCRAP({
93
+ search: "demon slayer background hd",
94
+ execute: function (element) {
95
+ if (element.url.length < 20) return element;
96
+ },
97
+ });
98
+
99
+ console.log(test);
100
100
  })();
101
101
  ```
102
102
 
@@ -105,33 +105,33 @@ Search cats images
105
105
  All query options are optional (see below for all the options)
106
106
 
107
107
  ```js
108
- (async function(){
109
- const test = await GOOGLE_IMG_SCRAP({
110
- search: "cats",
111
- query: {
112
- TYPE: GOOGLE_QUERY.TYPE.CLIPART,
113
- DATE: GOOGLE_QUERY.DATE.YEAR,
114
- COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
115
- SIZE: GOOGLE_QUERY.SIZE.LARGE,
116
- LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
117
- EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
118
- },
119
- });
120
-
121
- console.log(test);
108
+ (async function () {
109
+ const test = await GOOGLE_IMG_SCRAP({
110
+ search: "cats",
111
+ query: {
112
+ TYPE: GOOGLE_QUERY.TYPE.CLIPART,
113
+ DATE: GOOGLE_QUERY.DATE.YEAR,
114
+ COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
115
+ SIZE: GOOGLE_QUERY.SIZE.LARGE,
116
+ LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
117
+ EXTENSION: GOOGLE_QUERY.EXTENSION.JPG,
118
+ },
119
+ });
120
+
121
+ console.log(test);
122
122
  })();
123
123
  ```
124
124
 
125
125
  ## Limit result size
126
126
 
127
127
  ```js
128
- (async function(){
129
- const test = await GOOGLE_IMG_SCRAP({
130
- search: "cats",
131
- limit: 5,
132
- });
128
+ (async function () {
129
+ const test = await GOOGLE_IMG_SCRAP({
130
+ search: "cats",
131
+ limit: 5,
132
+ });
133
133
 
134
- console.log(test);
134
+ console.log(test);
135
135
  })();
136
136
  ```
137
137
 
@@ -140,26 +140,26 @@ All query options are optional (see below for all the options)
140
140
  Only scrap from a specific domain
141
141
 
142
142
  ```js
143
- (async function(){
144
- const test = await GOOGLE_IMG_SCRAP({
145
- search: "cats",
146
- domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
147
- });
143
+ (async function () {
144
+ const test = await GOOGLE_IMG_SCRAP({
145
+ search: "cats",
146
+ domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
147
+ });
148
148
 
149
- console.log(test);
149
+ console.log(test);
150
150
  })();
151
151
  ```
152
152
 
153
153
  ## Exclude domains
154
154
 
155
155
  ```js
156
- (async function(){
157
- const test = await GOOGLE_IMG_SCRAP({
158
- search: "cats",
159
- excludeDomains: ["istockphoto.com", "alamy.com"]
160
- });
156
+ (async function () {
157
+ const test = await GOOGLE_IMG_SCRAP({
158
+ search: "cats",
159
+ excludeDomains: ["istockphoto.com", "alamy.com"],
160
+ });
161
161
 
162
- console.log(test);
162
+ console.log(test);
163
163
  })();
164
164
  ```
165
165
 
@@ -168,39 +168,39 @@ Only scrap from a specific domain
168
168
  If you don' like black cats and white cats
169
169
 
170
170
  ```js
171
- (async function(){
172
- const test = await GOOGLE_IMG_SCRAP({
173
- search: "cats",
174
- excludeWords: ["black", "white"], //If you don't like black cats and white cats
175
- });
171
+ (async function () {
172
+ const test = await GOOGLE_IMG_SCRAP({
173
+ search: "cats",
174
+ excludeWords: ["black", "white"], //If you don't like black cats and white cats
175
+ });
176
176
 
177
- console.log(test, test.result.length);
177
+ console.log(test);
178
178
  })();
179
179
  ```
180
180
 
181
181
  ## Safe search (no nsfw)
182
182
 
183
183
  ```js
184
- (async function(){
185
- const test = await GOOGLE_IMG_SCRAP({
186
- search: "cats",
187
- safeSearch: false,
188
- });
184
+ (async function () {
185
+ const test = await GOOGLE_IMG_SCRAP({
186
+ search: "cats",
187
+ safeSearch: false,
188
+ });
189
189
 
190
- console.log(test);
190
+ console.log(test);
191
191
  })();
192
192
  ```
193
193
 
194
194
  ## Custom query params
195
195
 
196
196
  ```js
197
- (async function(){
198
- const test = await GOOGLE_IMG_SCRAP({
199
- search: "cats",
200
- custom: "name=content&name2=content2",
201
- });
197
+ (async function () {
198
+ const test = await GOOGLE_IMG_SCRAP({
199
+ search: "cats",
200
+ custom: "name=content&name2=content2",
201
+ });
202
202
 
203
- console.log(test);
203
+ console.log(test);
204
204
  })();
205
205
  ```
206
206
 
@@ -209,17 +209,17 @@ If you don' like black cats and white cats
209
209
  - urlMatch work like filterByTiles
210
210
 
211
211
  ```js
212
- (async function(){
213
- const test = await GOOGLE_IMG_SCRAP({
214
- search: "cats",
215
- //will build something like this "(draw and white) or (albino and white)"
216
- filterByTitles: [
217
- ["draw", "white"],
218
- ["albino", "white"]
219
- ],
220
- });
221
-
222
- console.log(test);
212
+ (async function () {
213
+ const test = await GOOGLE_IMG_SCRAP({
214
+ search: "cats",
215
+ //will build something like this "(draw and white) or (albino and white)"
216
+ filterByTitles: [
217
+ ["draw", "white"],
218
+ ["albino", "white"],
219
+ ],
220
+ });
221
+
222
+ console.log(test);
223
223
  })();
224
224
  ```
225
225
 
@@ -227,10 +227,10 @@ If you don' like black cats and white cats
227
227
 
228
228
  ```js
229
229
  {
230
- SIZE: {
231
- LARGE,
232
- MEDIUM,
233
- ICON
230
+ SIZE: {
231
+ LARGE,
232
+ MEDIUM,
233
+ ICON
234
234
  },
235
235
  COLOR: {
236
236
  BLACK_AND_WHITE,
@@ -248,10 +248,10 @@ If you don' like black cats and white cats
248
248
  BLACK,
249
249
  BROWN
250
250
  },
251
- TYPE: {
252
- CLIPART,
253
- DRAW,
254
- GIF
251
+ TYPE: {
252
+ CLIPART,
253
+ DRAW,
254
+ GIF
255
255
  },
256
256
  EXTENSION: {
257
257
  JPG,
@@ -263,15 +263,15 @@ If you don' like black cats and white cats
263
263
  ICO,
264
264
  RAW
265
265
  },
266
- DATE: {
267
- DAY,
268
- WEEK,
269
- MONTH,
270
- YEAR
266
+ DATE: {
267
+ DAY,
268
+ WEEK,
269
+ MONTH,
270
+ YEAR
271
271
  },
272
- LICENCE: {
273
- CREATIVE_COMMONS,
274
- COMMERCIAL_AND_OTHER
272
+ LICENCE: {
273
+ CREATIVE_COMMONS,
274
+ COMMERCIAL_AND_OTHER
275
275
  }
276
276
  }
277
277
  ```
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "google-img-scrap",
3
- "version": "1.0.7",
4
- "description": "Scrap images from google images with customs pre filled options",
3
+ "version": "1.0.8",
4
+ "description": "Scrap images from google images with customs pre filled dorking options",
5
5
  "main": "./src/google-img-scrap.js",
6
6
  "types": "./types/index.d.ts",
7
7
  "directories": {
@@ -34,6 +34,10 @@
34
34
  {
35
35
  "name": "christophe77",
36
36
  "url": "https://github.com/christophe77"
37
+ },
38
+ {
39
+ "name": "GaspardCulis",
40
+ "url": "https://github.com/GaspardCulis"
37
41
  }
38
42
  ],
39
43
  "license": "MIT",
@@ -0,0 +1,184 @@
1
+ const got = require("got");
2
+ const { FastHTMLParser } = require("fast-html-dom-parser");
3
+
4
+ const { GOOGLE_CONSTANT } = require("../constant/GOOGLE_CONSTANT");
5
+ const { GOOGLE_QUERY } = require("../constant/query/GOOGLE_QUERY");
6
+ const EXTENSIONS = require("../constant/extensions/IMAGES_EXTENSIONS.json");
7
+
8
+ const { buildQuery, unicodeToChar } = require("../utils/UTILS");
9
+
10
+ /**
11
+ * Validation of the arguments passed
12
+ * @param {import("../../types").Config} config
13
+ */
14
+ function verify(config) {
15
+ if (config.excludeDomains && config.domains)
16
+ throw "Can not set 'excludeDomains' and 'domains' as same times";
17
+
18
+ if (!config.search || config.search.trim() == "")
19
+ throw "'search' can not be empty";
20
+
21
+ if (config.query) {
22
+ const queryToVerify = Object.keys(GOOGLE_QUERY);
23
+
24
+ for (const key of Object.keys(config.query)) {
25
+ if (!queryToVerify.includes(key)) throw `Invalide query name '${key}'`;
26
+
27
+ const VALUES = Object.values(GOOGLE_QUERY[key]);
28
+ const ACTUAL_VALUE = config.query[key];
29
+ if (!VALUES.includes(ACTUAL_VALUE))
30
+ throw `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`;
31
+ }
32
+ }
33
+ }
34
+
35
+ /**
36
+ * Verifify the url is an image
37
+ * @param {string} content
38
+ * @returns {boolean}
39
+ */
40
+ function containImage(content = "") {
41
+ return EXTENSIONS.some((extension) => content.includes(extension));
42
+ }
43
+
44
+ /**
45
+ *Parse the html from google image to get the images links
46
+ * @param {string} url
47
+ * @returns {import("../../types").FinalResult[]}
48
+ */
49
+ async function parse(url) {
50
+ const result = [];
51
+
52
+ const response = await got(url, {
53
+ headers: GOOGLE_CONSTANT.headers,
54
+ });
55
+ const parser = new FastHTMLParser(response.body);
56
+
57
+ const scripts = parser.getElementsByTagName("script");
58
+
59
+ if (!scripts) return result;
60
+
61
+ for (const script of scripts) {
62
+ const body = script.innerHTML;
63
+
64
+ const valide = containImage(body);
65
+
66
+ if (valide) {
67
+ const regex = /\["(http.+?)",(\d+),(\d+)\]/gi;
68
+
69
+ let res = null;
70
+
71
+ while ((res = regex.exec(body)) != null) {
72
+ if (res.length >= 4 && res[1].match(/http/gi).length < 2)
73
+ result.push({
74
+ url: unicodeToChar(res[1]),
75
+ height: res[2],
76
+ width: res[3],
77
+ });
78
+ }
79
+ }
80
+ }
81
+
82
+ return result;
83
+ }
84
+
85
+ /**
86
+ * Main function to build google image dork URL
87
+ * @param {import("../../types").Config} config
88
+ * @returns {import("../../types").Results}
89
+ */
90
+ async function GOOGLE_IMG_SCRAP(config = {}) {
91
+ verify(config);
92
+
93
+ //exclude domains
94
+ const EXCLUDE_DOMAINS = [];
95
+ if (config.excludeDomains)
96
+ config.excludeDomains.forEach((domain) =>
97
+ EXCLUDE_DOMAINS.push(`-site:"${domain}"`)
98
+ );
99
+
100
+ //domains
101
+ const DOMAINS = [];
102
+ if (config.domains)
103
+ config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`));
104
+
105
+ //exclude words
106
+ const EXCLUDE_WORDS = [];
107
+ if (config.excludeWords)
108
+ config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`));
109
+
110
+ //filter by titles
111
+ const FILTER_TITLE = [];
112
+ if (config.filterByTitles)
113
+ config.filterByTitles.forEach((titleFilter) => {
114
+ const value = titleFilter.map((title) => {
115
+ return `intitle:"${title}"`;
116
+ });
117
+
118
+ FILTER_TITLE.push(`(${value.join(" AND ")})`);
119
+ });
120
+
121
+ //url match words
122
+ const URL_MATCH = [];
123
+ if (config.urlMatch)
124
+ config.urlMatch.forEach((urlMatch) => {
125
+ const value = urlMatch.map((content) => {
126
+ return `inurl:${content}`;
127
+ });
128
+
129
+ URL_MATCH.push(`(${value.join(" AND ")})`);
130
+ });
131
+
132
+ //building url
133
+ const SEARCH_TERM =
134
+ config.search +
135
+ " " +
136
+ URL_MATCH.join(" OR ") +
137
+ " " +
138
+ FILTER_TITLE.join(" OR ") +
139
+ " " +
140
+ EXCLUDE_WORDS.join(" ") +
141
+ " " +
142
+ EXCLUDE_DOMAINS.join(" ") +
143
+ " " +
144
+ DOMAINS.join(" OR ");
145
+
146
+ const SEARCH = encodeURIComponent(SEARCH_TERM.trim());
147
+ const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
148
+ [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","),
149
+ q: SEARCH,
150
+ });
151
+
152
+ const CUSTOM_PARAM = config.custom ? `&${config.custom}` : "";
153
+ const SAFE_SEARCH = config.safeSearch ? `&safe=active` : "";
154
+
155
+ const URL =
156
+ GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH;
157
+
158
+ //parsing
159
+ const result = await parse(URL);
160
+
161
+ //excute function
162
+ let finalResult = [];
163
+ if (config.execute)
164
+ result.forEach((element) => {
165
+ const value = config.execute(element);
166
+ if (value) finalResult.push(value);
167
+ });
168
+ else finalResult = result;
169
+
170
+ //limit result
171
+ let slicedResult = [];
172
+ const { limit } = config;
173
+
174
+ if (limit && limit > 0 && finalResult.length > limit) {
175
+ slicedResult = finalResult.slice(0, limit);
176
+ }
177
+ //result
178
+ return {
179
+ url: URL,
180
+ result: slicedResult.length > 0 ? slicedResult : finalResult,
181
+ };
182
+ }
183
+
184
+ module.exports = { GOOGLE_IMG_SCRAP, GOOGLE_QUERY };
@@ -1,13 +1,13 @@
1
1
  const GOOGLE_CONSTANT = {
2
- url: "https://images.google.com/search",
3
- queryParam: "tbs",
4
- forceGoogleImage: {
5
- tbm: "isch" //needed to search on google image instead of google
6
- },
7
- headers: {
8
- 'User-Agent':
9
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
10
- }
2
+ url: "https://images.google.com/search",
3
+ queryParam: "tbs",
4
+ forceGoogleImage: {
5
+ tbm: "isch", //needed to search on google image instead of google
6
+ },
7
+ headers: {
8
+ "User-Agent":
9
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
10
+ },
11
11
  };
12
12
 
13
- module.exports = { GOOGLE_CONSTANT };
13
+ module.exports = { GOOGLE_CONSTANT };
@@ -1 +1 @@
1
- ["jpg", "gif", "bmp", "png", "svg", "webp", "ico", "raw"]
1
+ ["jpg", "gif", "bmp", "png", "svg", "webp", "ico", "raw"]
@@ -1,14 +1,16 @@
1
- const COLORS = ["red",
2
- "blue",
3
- "purple",
4
- "orange",
5
- "yellow",
6
- "green",
7
- "teal",
8
- "pink",
9
- "white",
10
- "gray",
11
- "black",
12
- "brown"];
1
+ const COLORS = [
2
+ "red",
3
+ "blue",
4
+ "purple",
5
+ "orange",
6
+ "yellow",
7
+ "green",
8
+ "teal",
9
+ "pink",
10
+ "white",
11
+ "gray",
12
+ "black",
13
+ "brown",
14
+ ];
13
15
 
14
- module.exports = { COLORS };
16
+ module.exports = { COLORS };
@@ -1,9 +1,17 @@
1
1
  const SIZE_PARAM = "isz",
2
- COLOR_PARAM = "ic",
3
- SPECIFIC_COLOR_PARAM = "Cisc",
4
- TYPE_PARAM = "itp",
5
- DATE_PARAM = "qdr",
6
- LICENCE_PARAM = "il",
7
- IMAGE_EXTENSION_PARAM = "ift";
2
+ COLOR_PARAM = "ic",
3
+ SPECIFIC_COLOR_PARAM = "Cisc",
4
+ TYPE_PARAM = "itp",
5
+ DATE_PARAM = "qdr",
6
+ LICENCE_PARAM = "il",
7
+ IMAGE_EXTENSION_PARAM = "ift";
8
8
 
9
- module.exports = { SIZE_PARAM, COLOR_PARAM, SPECIFIC_COLOR_PARAM, TYPE_PARAM, DATE_PARAM, LICENCE_PARAM, IMAGE_EXTENSION_PARAM };
9
+ module.exports = {
10
+ SIZE_PARAM,
11
+ COLOR_PARAM,
12
+ SPECIFIC_COLOR_PARAM,
13
+ TYPE_PARAM,
14
+ DATE_PARAM,
15
+ LICENCE_PARAM,
16
+ IMAGE_EXTENSION_PARAM,
17
+ };
@@ -1,50 +1,59 @@
1
- const { SIZE_PARAM, COLOR_PARAM, SPECIFIC_COLOR_PARAM, TYPE_PARAM, DATE_PARAM, LICENCE_PARAM, IMAGE_EXTENSION_PARAM } = require('./GOOGLE_PARAMS');
2
- const { COLORS } = require('./GOOGLE_COLORS');
1
+ const {
2
+ SIZE_PARAM,
3
+ COLOR_PARAM,
4
+ SPECIFIC_COLOR_PARAM,
5
+ TYPE_PARAM,
6
+ DATE_PARAM,
7
+ LICENCE_PARAM,
8
+ IMAGE_EXTENSION_PARAM,
9
+ } = require("./GOOGLE_PARAMS");
10
+ const { COLORS } = require("./GOOGLE_COLORS");
3
11
  const EXTENSIONS = require("../extensions/IMAGES_EXTENSIONS.json");
4
12
 
5
13
  const GOOGLE_QUERY = {
6
- SIZE: {
7
- LARGE: SIZE_PARAM + ":l",
8
- MEDIUM: SIZE_PARAM + ":m",
9
- ICON: SIZE_PARAM + ":i",
10
- },
11
-
12
- COLOR: {
13
- BLACK_AND_WHITE: COLOR_PARAM + ":gray",
14
- TRANSPARENT: COLOR_PARAM + ":trans",
15
- },
16
-
17
- TYPE: {
18
- CLIPART: TYPE_PARAM + ":clipart",
19
- DRAW: TYPE_PARAM + ":lineart",
20
- GIF: TYPE_PARAM + ":animated",
21
- },
22
-
23
- EXTENSION: {},
24
-
25
- DATE: {
26
- DAY: DATE_PARAM + ":d",
27
- WEEK: DATE_PARAM + ":w",
28
- MONTH: DATE_PARAM + ":m",
29
- YEAR: DATE_PARAM + ":y",
30
- },
31
-
32
- LICENCE: {
33
- CREATIVE_COMMONS: LICENCE_PARAM + ":cl",
34
- COMMERCIAL_AND_OTHER: LICENCE_PARAM + ":ol",
35
- },
14
+ SIZE: {
15
+ LARGE: SIZE_PARAM + ":l",
16
+ MEDIUM: SIZE_PARAM + ":m",
17
+ ICON: SIZE_PARAM + ":i",
18
+ },
19
+
20
+ COLOR: {
21
+ BLACK_AND_WHITE: COLOR_PARAM + ":gray",
22
+ TRANSPARENT: COLOR_PARAM + ":trans",
23
+ },
24
+
25
+ TYPE: {
26
+ CLIPART: TYPE_PARAM + ":clipart",
27
+ DRAW: TYPE_PARAM + ":lineart",
28
+ GIF: TYPE_PARAM + ":animated",
29
+ },
30
+
31
+ EXTENSION: {},
32
+
33
+ DATE: {
34
+ DAY: DATE_PARAM + ":d",
35
+ WEEK: DATE_PARAM + ":w",
36
+ MONTH: DATE_PARAM + ":m",
37
+ YEAR: DATE_PARAM + ":y",
38
+ },
39
+
40
+ LICENCE: {
41
+ CREATIVE_COMMONS: LICENCE_PARAM + ":cl",
42
+ COMMERCIAL_AND_OTHER: LICENCE_PARAM + ":ol",
43
+ },
36
44
  };
37
45
 
38
46
  //build extension
39
47
  EXTENSIONS.forEach((EXTENSION) => {
40
- const queryName = EXTENSION.toUpperCase();
41
- GOOGLE_QUERY.EXTENSION[queryName] = IMAGE_EXTENSION_PARAM + ":" + EXTENSION;
48
+ const queryName = EXTENSION.toUpperCase();
49
+ GOOGLE_QUERY.EXTENSION[queryName] = IMAGE_EXTENSION_PARAM + ":" + EXTENSION;
42
50
  });
43
51
 
44
52
  //build colors
45
53
  COLORS.forEach((COLOR) => {
46
- const queryName = COLOR.toUpperCase();
47
- GOOGLE_QUERY.COLOR[queryName] = COLOR_PARAM + ":specific," + SPECIFIC_COLOR_PARAM + ":" + COLOR;
54
+ const queryName = COLOR.toUpperCase();
55
+ GOOGLE_QUERY.COLOR[queryName] =
56
+ COLOR_PARAM + ":specific," + SPECIFIC_COLOR_PARAM + ":" + COLOR;
48
57
  });
49
58
 
50
- module.exports = { GOOGLE_QUERY };
59
+ module.exports = { GOOGLE_QUERY };
@@ -3,12 +3,14 @@ const { FastHTMLParser } = require("fast-html-dom-parser");
3
3
 
4
4
  const { GOOGLE_CONSTANT } = require("./constant/GOOGLE_CONSTANT");
5
5
  const { GOOGLE_QUERY } = require("./constant/query/GOOGLE_QUERY");
6
- const { TRANSLATOR } = require("./constant/translator/TRANSLATOR");
7
6
  const EXTENSIONS = require("./constant/extensions/IMAGES_EXTENSIONS.json");
8
7
 
9
8
  const { buildQuery, unicodeToChar } = require("./utils/UTILS");
10
9
 
11
- //verify good configuration
10
+ /**
11
+ * Validation of the arguments passed
12
+ * @param {import("../types").Config} config
13
+ */
12
14
  function verify(config) {
13
15
  if (config.excludeDomains && config.domains)
14
16
  throw "Can not set 'excludeDomains' and 'domains' as same times";
@@ -30,16 +32,20 @@ function verify(config) {
30
32
  }
31
33
  }
32
34
 
33
- //verify imag extension
35
+ /**
36
+ * Verifify the url is an image
37
+ * @param {string} content
38
+ * @returns {boolean}
39
+ */
34
40
  function containImage(content = "") {
35
- for (const EXTENSION of EXTENSIONS) {
36
- if (content.includes(EXTENSION)) return true;
37
- }
38
-
39
- return false;
41
+ return EXTENSIONS.some((extension) => content.includes(extension));
40
42
  }
41
43
 
42
- //parse HTML
44
+ /**
45
+ *Parse the html from google image to get the images links
46
+ * @param {string} url
47
+ * @returns {import("../types").FinalResult[]}
48
+ */
43
49
  async function parse(url) {
44
50
  const result = [];
45
51
 
@@ -58,17 +64,45 @@ async function parse(url) {
58
64
  const valide = containImage(body);
59
65
 
60
66
  if (valide) {
61
- const regex = /\["(http.+?)",(\d+),(\d+)\]/gi;
67
+ //getting image url, height, width, average
68
+ const regex =
69
+ /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi;
70
+
71
+ //getting originalUrl, title, id
72
+ const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi;
62
73
 
63
74
  let res = null;
75
+ let secondRes = null;
76
+
77
+ while (
78
+ (res = regex.exec(body)) != null &&
79
+ (secondRes = secondRegex.exec(body)) != null
80
+ ) {
81
+ if (
82
+ res.length >= 4 &&
83
+ res[1].match(/http/gi).length < 2 &&
84
+ secondRes.length === 4 &&
85
+ secondRes[2].match(/http/gi).length < 2
86
+ ) {
87
+ const [r, g, b] = [res[4], res[5], res[6]].map((e) =>
88
+ parseInt(e, 10)
89
+ );
64
90
 
65
- while ((res = regex.exec(body)) != null) {
66
- if (res.length >= 4 && res[1].match(/http/gi).length < 2)
67
91
  result.push({
92
+ id: secondRes[1],
93
+ title: secondRes[3],
68
94
  url: unicodeToChar(res[1]),
69
- height: res[2],
70
- width: res[3],
95
+ originalUrl: unicodeToChar(secondRes[2]),
96
+ averageColor: `rgb(${r}, ${g}, ${b})`,
97
+ averageColorObject: {
98
+ r,
99
+ g,
100
+ b,
101
+ },
102
+ height: parseInt(res[2], 10),
103
+ width: parseInt(res[3], 10),
71
104
  });
105
+ }
72
106
  }
73
107
  }
74
108
  }
@@ -76,9 +110,12 @@ async function parse(url) {
76
110
  return result;
77
111
  }
78
112
 
79
- //main
113
+ /**
114
+ * Main function to build google image dork URL
115
+ * @param {import("../types").Config} config
116
+ * @returns {import("../types").Results}
117
+ */
80
118
  async function GOOGLE_IMG_SCRAP(config = {}) {
81
- //verify config
82
119
  verify(config);
83
120
 
84
121
  //exclude domains
@@ -144,10 +181,7 @@ async function GOOGLE_IMG_SCRAP(config = {}) {
144
181
  const SAFE_SEARCH = config.safeSearch ? `&safe=active` : "";
145
182
 
146
183
  const URL =
147
- GOOGLE_CONSTANT.url +
148
- buildQuery(QUERY, TRANSLATOR) +
149
- CUSTOM_PARAM +
150
- SAFE_SEARCH;
184
+ GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH;
151
185
 
152
186
  //parsing
153
187
  const result = await parse(URL);
@@ -1,24 +1,20 @@
1
- function buildQuery(query, translator){
2
- const result = [];
1
+ function buildQuery(query) {
2
+ const result = [];
3
3
 
4
- const params = Object.keys(query);
5
- const toTranslate = Object.keys(translator);
4
+ const params = Object.keys(query);
6
5
 
7
- for(const param of params){
8
- const queryName = param;
9
- if(toTranslate.includes(param)) queryName = toTranslate[param];
6
+ for (const param of params) {
7
+ const queryName = param;
8
+ result.push(`${queryName}=${query[param]}`);
9
+ }
10
10
 
11
- result.push(`${queryName}=${query[param]}`);
12
- }
13
-
14
- return "?" + result.join('&');
11
+ return "?" + result.join("&");
15
12
  }
16
13
 
17
14
  function unicodeToChar(text) {
18
- return text.replace(/\\u[\dA-F]{4}/gi,
19
- function (match) {
20
- return String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16));
21
- });
22
- }
15
+ return text.replace(/\\u[\dA-F]{4}/gi, function (match) {
16
+ return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16));
17
+ });
18
+ }
23
19
 
24
- module.exports = { buildQuery, unicodeToChar };
20
+ module.exports = { buildQuery, unicodeToChar };
@@ -1,16 +1,16 @@
1
- const { GOOGLE_IMG_SCRAP } = require('../src/google-img-scrap');
1
+ const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
2
2
 
3
- (async function(){
4
- const test = await GOOGLE_IMG_SCRAP({
5
- search: "cats",
6
- filterByTitles: [
7
- ["draw", "white"],
8
- ["albino", "white"]
9
- ],
10
- execute: function(element){
11
- if(!element.url.match('gstatic.com')) return element;
12
- }
13
- });
3
+ (async function () {
4
+ const test = await GOOGLE_IMG_SCRAP({
5
+ search: "cats",
6
+ filterByTitles: [
7
+ ["draw", "white"],
8
+ ["albino", "white"],
9
+ ],
10
+ execute: function (element) {
11
+ if (!element.url.match("gstatic.com")) return element;
12
+ },
13
+ });
14
14
 
15
- console.log(test, test.result.length);
16
- })();
15
+ console.log(test, test.result.length);
16
+ })();
@@ -0,0 +1,19 @@
1
+ const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
2
+ const V107 = require("../src/back/google-img-scrap-1.0.7-.js");
3
+
4
+ // console.log(GOOGLE_QUERY);
5
+
6
+ (async function () {
7
+ const test = await GOOGLE_IMG_SCRAP({
8
+ search: "cats",
9
+ });
10
+
11
+ const test2 = await V107.GOOGLE_IMG_SCRAP({
12
+ search: "cats",
13
+ execute: function (element) {
14
+ if (!element.url.match("gstatic.com")) return element;
15
+ },
16
+ });
17
+
18
+ console.log(test.result.length, test2.result.length);
19
+ })();
@@ -0,0 +1,11 @@
1
+ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap");
2
+
3
+ // console.log(GOOGLE_QUERY);
4
+
5
+ (async function () {
6
+ const test = await GOOGLE_IMG_SCRAP({
7
+ search: "cats",
8
+ });
9
+
10
+ console.log(test, test.result.length);
11
+ })();
@@ -1,16 +1,13 @@
1
- const { GOOGLE_IMG_SCRAP } = require('../src/google-img-scrap');
1
+ const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap");
2
2
 
3
- (async function(){
4
- const test = await GOOGLE_IMG_SCRAP({
5
- search: "cats",
6
- urlMatch: [
7
- ["cdn"],
8
- ["istockphoto"]
9
- ],
10
- execute: function(element){
11
- if(!element.url.match('gstatic.com')) return element;
12
- }
13
- });
3
+ (async function () {
4
+ const test = await GOOGLE_IMG_SCRAP({
5
+ search: "cats",
6
+ urlMatch: [["cdn"], ["istockphoto"]],
7
+ execute: function (element) {
8
+ if (!element.url.match("gstatic.com")) return element;
9
+ },
10
+ });
14
11
 
15
- console.log(test, test.result.length);
16
- })();
12
+ console.log(test, test.result.length);
13
+ })();
@@ -1,17 +1,21 @@
1
- const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
1
+ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap");
2
2
 
3
- (async function(){
4
- const test = await GOOGLE_IMG_SCRAP({
5
- search: "demon slayer background hd",
6
- query: {
7
- SIZE: GOOGLE_QUERY.SIZE.LARGE,
8
- },
9
- domains: ["alphacoders.com"],
10
- safeSearch: false,
11
- execute: function(element){
12
- if(!element.url.match('gstatic.com')) return element;
13
- }
14
- });
3
+ (async function () {
4
+ const test = await GOOGLE_IMG_SCRAP({
5
+ search: "demon slayer background hd",
6
+ query: {
7
+ SIZE: GOOGLE_QUERY.SIZE.LARGE,
8
+ },
9
+ domains: ["alphacoders.com"],
10
+ safeSearch: false,
11
+ execute: function (element) {
12
+ if (!element.url.match("gstatic.com")) return element;
13
+ },
14
+ });
15
15
 
16
- console.log(test, test.result[test.result.length-1].url, test.result.length);
17
- })();
16
+ console.log(
17
+ test,
18
+ test.result[test.result.length - 1].url,
19
+ test.result.length
20
+ );
21
+ })();
package/test/test.js CHANGED
@@ -1,26 +1,26 @@
1
- const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('../src/google-img-scrap');
1
+ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap");
2
2
 
3
3
  // console.log(GOOGLE_QUERY);
4
4
 
5
- (async function(){
6
- const test = await GOOGLE_IMG_SCRAP({
7
- search: "cats",
8
- query: {
9
- TYPE: GOOGLE_QUERY.TYPE.CLIPART,
10
- DATE: GOOGLE_QUERY.DATE.YEAR,
11
- COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
12
- SIZE: GOOGLE_QUERY.SIZE.LARGE,
13
- LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
14
- EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
15
- },
16
- excludeWords: ["black", "white"], //If you don't like black and white cats
17
- custom: "name=content&name2=content2",
18
- safeSearch: false,
19
- execute: function(element){
20
- if(!element.url.match('gstatic.com')) return element;
21
- }
22
- // excludeDomains: ["istockphoto.com", "alamy.com"]
23
- });
5
+ (async function () {
6
+ const test = await GOOGLE_IMG_SCRAP({
7
+ search: "cats",
8
+ query: {
9
+ TYPE: GOOGLE_QUERY.TYPE.CLIPART,
10
+ DATE: GOOGLE_QUERY.DATE.YEAR,
11
+ COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE,
12
+ SIZE: GOOGLE_QUERY.SIZE.LARGE,
13
+ LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
14
+ EXTENSION: GOOGLE_QUERY.EXTENSION.JPG,
15
+ },
16
+ excludeWords: ["black", "white"], //If you don't like black and white cats
17
+ custom: "name=content&name2=content2",
18
+ safeSearch: false,
19
+ execute: function (element) {
20
+ if (!element.url.match("gstatic.com")) return element;
21
+ },
22
+ // excludeDomains: ["istockphoto.com", "alamy.com"]
23
+ });
24
24
 
25
- console.log(test, test.result.length);
26
- })();
25
+ console.log(test, test.result.length);
26
+ })();
package/types/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  type Config = {
2
2
  search: string;
3
- limit?:number;
3
+ limit?: number;
4
4
  query?: {
5
5
  TYPE?: string;
6
6
  DATE?: string;
@@ -17,15 +17,27 @@ type Config = {
17
17
  execute?: (element: FinalResult) => FinalResult | undefined;
18
18
  filterByTitles?: [string[]];
19
19
  };
20
+
20
21
  type FinalResult = {
22
+ id: string;
23
+ title: string;
24
+ originalUrl: string;
21
25
  url: string;
22
- height: string;
23
- width: string;
26
+ averageColor: string;
27
+ averageColorObject: {
28
+ r: number;
29
+ g: number;
30
+ b: number;
31
+ };
32
+ height: number;
33
+ width: number;
24
34
  };
35
+
25
36
  type Results = {
26
37
  url: string;
27
38
  result: FinalResult[];
28
39
  };
40
+
29
41
  type GoogleQuery = {
30
42
  SIZE: {
31
43
  LARGE: string;
@@ -1,3 +0,0 @@
1
- const TRANSLATOR = {};
2
-
3
- module.exports = { TRANSLATOR };