google-img-scrap 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ### v1.0.4
4
+
5
+ - New option ```urlMatch```. You know get image when an url match a string (example: "cdn")
6
+ - New option ```filterByTitles```. Filter images by titles
7
+
3
8
  ### v1.0.3
4
9
  - New option ```execute```. allow you to execute a function to remove "gstatic.com" domains for example
5
10
 
package/README.md CHANGED
@@ -1,5 +1,6 @@
1
- # Google-img-scrap v1.0.3
2
- Scrap images from google image with lot of options
1
+ # Google-img-scrap v1.0.4
2
+
3
+ Scrap images from google image with lot of tools and options.
3
4
 
4
5
  ## Update
5
6
 
@@ -38,7 +39,9 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('./src/google-img-scrap');
38
39
  - "excludeDomains" (Array of String) exclude some domains
39
40
  - "safeSearch" (Boolean) active safe search or not for nsfw for example
40
41
  - "custom" (String) add extra query
41
- - "query" (Object) set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items, you can see example behind)
42
+ - "urlMatch" (Array of Array) get image when an url match a string (example: "cdn") | ```example below```
43
+ - "filterByTitles" (Array of Array) filter images by titles | ```example below```
44
+ - "query" (Object) set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | ```example below```
42
45
 
43
46
  ## Result
44
47
 
@@ -87,11 +90,11 @@ console.log(GOOGLE_QUERY);
87
90
  LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER,
88
91
  EXTENSION: GOOGLE_QUERY.EXTENSION.JPG
89
92
  },
90
- domains: ["alamy.com", "istockphoto.com", "vecteezy.com", "gstatic.com"],
93
+ domains: ["alamy.com", "istockphoto.com", "vecteezy.com"],
91
94
  excludeWords: ["black", "white"], //If you don't like black and white cats
92
95
  custom: "name=content&name2=content2",
93
96
  safeSearch: false,
94
- // excludeDomains: ["gstatic.com", "istockphoto.com", "alamy.com"]
97
+ // excludeDomains: ["istockphoto.com", "alamy.com"]
95
98
  });
96
99
 
97
100
  console.log(test, test.result.length);
@@ -134,6 +137,30 @@ const { GOOGLE_IMG_SCRAP , GOOGLE_QUERY } = require('google-img-scrap');
134
137
  })();
135
138
  ```
136
139
 
140
+ ## How urlMatch and filterByTitles work ?
141
+
142
+ - urlMatch work like filterByTiles
143
+
144
+ ```js
145
+ const { GOOGLE_IMG_SCRAP } = require('google-img-scrap');
146
+
147
+ (async function(){
148
+ const test = await GOOGLE_IMG_SCRAP({
149
+ search: "cats",
150
+ //will build something like this "(draw and white) or (albino and white)"
151
+ filterByTitles: [
152
+ ["draw", "white"],
153
+ ["albino", "white"]
154
+ ],
155
+ execute: function(element){
156
+ if(!element.url.match('gstatic.com')) return element;
157
+ }
158
+ });
159
+
160
+ console.log(test, test.result.length);
161
+ })();
162
+ ```
163
+
137
164
  ## Google query
138
165
 
139
166
  ```js
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "google-img-scrap",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Scrap images from google images with a lot of options",
5
5
  "main": "./src/google-img-scrap.js",
6
6
  "directories": {
@@ -89,8 +89,38 @@ async function GOOGLE_IMG_SCRAP(config = {}){
89
89
  const EXCLUDE_WORDS = [];
90
90
  if(config.excludeWords) config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`));
91
91
 
92
+ //filter by titles
93
+ const FILTER_TITLE = [];
94
+ if(config.filterByTitles) config.filterByTitles.forEach((titleFilter) => {
95
+
96
+ const value = titleFilter.map((title) => {
97
+ return `intitle:"${title}"`;
98
+ });
99
+
100
+ FILTER_TITLE.push(`(${value.join(' AND ')})`);
101
+
102
+ });
103
+
104
+ //url match words
105
+ const URL_MATCH = [];
106
+ if(config.urlMatch) config.urlMatch.forEach((urlMatch) => {
107
+
108
+ const value = urlMatch.map((content) => {
109
+ return `inurl:${content}`;
110
+ });
111
+
112
+ URL_MATCH.push(`(${value.join(' AND ')})`);
113
+
114
+ });
115
+
92
116
  //building url
93
- const SEARCH_TERM = config.search + " " + EXCLUDE_WORDS.join(" ") + " " + EXCLUDE_DOMAINS.join(" ") + " " + DOMAINS.join(' OR ');
117
+ const SEARCH_TERM = config.search +
118
+ " " + URL_MATCH.join(" OR ") +
119
+ " " + FILTER_TITLE.join(" OR ") +
120
+ " " + EXCLUDE_WORDS.join(" ") +
121
+ " " + EXCLUDE_DOMAINS.join(" ") +
122
+ " " + DOMAINS.join(" OR ");
123
+
94
124
  const SEARCH = encodeURIComponent(SEARCH_TERM.trim())
95
125
  const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, {
96
126
  [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(','),
@@ -0,0 +1,16 @@
1
+ const { GOOGLE_IMG_SCRAP } = require('../src/google-img-scrap');
2
+
3
+ (async function(){
4
+ const test = await GOOGLE_IMG_SCRAP({
5
+ search: "cats",
6
+ filterByTitles: [
7
+ ["draw", "white"],
8
+ ["albino", "white"]
9
+ ],
10
+ execute: function(element){
11
+ if(!element.url.match('gstatic.com')) return element;
12
+ }
13
+ });
14
+
15
+ console.log(test, test.result.length);
16
+ })();
@@ -0,0 +1,16 @@
1
+ const { GOOGLE_IMG_SCRAP } = require('../src/google-img-scrap');
2
+
3
+ (async function(){
4
+ const test = await GOOGLE_IMG_SCRAP({
5
+ search: "cats",
6
+ urlMatch: [
7
+ ["cdn"],
8
+ ["istockphoto"]
9
+ ],
10
+ execute: function(element){
11
+ if(!element.url.match('gstatic.com')) return element;
12
+ }
13
+ });
14
+
15
+ console.log(test, test.result.length);
16
+ })();