tomosia_icon8_crawl 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -21
- data/lib/tomosia_icon8_crawl.rb +133 -135
- data/lib/tomosia_icon8_crawl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 255338f89b797ffceeb21f5d1a894a66c935b6d751df2638aab16c6c19c26036
|
4
|
+
data.tar.gz: 2021299e9056f9beae3448440904929205a11b93b66ff67553ee60ed26dcf07b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3108b2c8bb8ead660702486b5544489909096053fe4bb107f8bc7e5b5b368499d024a29bbc02c987a2f2ed19f9e14746840bf0a9e97f37780040a2cc4a4101e5
|
7
|
+
data.tar.gz: cf59adedf520a0ac3f0206b40bfecfb11fb3fa3f2c51e2a4438060b2564e8d658e67c9d0bb666f3a7a92be9a6bf50abfc3ad58e5bf924b28672dedeaf8189c31
|
data/README.md
CHANGED
@@ -14,49 +14,49 @@ gem 'tomosia_icon8_crawl'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
$ bundle install
|
18
18
|
|
19
19
|
Or install it yourself as:
|
20
20
|
|
21
|
-
|
21
|
+
$ gem install tomosia_icon8_crawl
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
### Commandline or Terminal:
|
26
26
|
|
27
|
-
|
27
|
+
Download all photos including pages from icon8.com with keyword you search
|
28
28
|
|
29
|
-
|
29
|
+
$ tomosia_icon8_crawl crawl "keyword" --destination="storage path"
|
30
30
|
|
31
|
-
|
31
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
32
32
|
|
33
|
-
|
33
|
+
$ tomosia_icon8_crawl crawl "keyword" --destination="storage path" --max=number
|
34
34
|
|
35
35
|
### Developers:
|
36
36
|
|
37
|
-
|
37
|
+
Gemfile:
|
38
38
|
|
39
|
-
|
39
|
+
$ gem 'tomosia_icon8_crawl'
|
40
40
|
|
41
|
-
|
41
|
+
Require:
|
42
42
|
|
43
|
-
|
43
|
+
$ require 'tomosia_icon8_crawl'
|
44
44
|
|
45
|
-
|
45
|
+
Download all photos including pages from icon8.com with keyword you search
|
46
46
|
|
47
|
-
|
47
|
+
$ TomosiaIcon8Crawl::CrawlIcon8.crawl("keyword", "destination")
|
48
48
|
|
49
|
-
|
49
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
50
50
|
|
51
|
-
|
51
|
+
$ TomosiaIcon8Crawl::CrawlIcon8.crawl("keyword", "destination", max)
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
Help:
|
54
|
+
- keyword is the word used for searching.
|
55
|
+
ex: corona, car, virus,...
|
56
|
+
- destination is the path of the directory where the image was downloaded.
|
57
|
+
ex: E:\download, C:\download, C:\Desktop,...
|
58
|
+
- max max is the number of images you want to download.
|
59
|
+
ex: 100, 10, 5, 1000,...
|
60
60
|
|
61
61
|
|
62
62
|
|
data/lib/tomosia_icon8_crawl.rb
CHANGED
@@ -1,152 +1,150 @@
|
|
1
1
|
module TomosiaIcon8Crawl
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
require 'open-uri'
|
3
|
+
require "httparty"
|
4
|
+
require 'pry'
|
5
|
+
require 'writeexcel'
|
6
|
+
class CrawlIcon8
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
8
|
+
# get data from website
|
9
|
+
def self.json(key, max = nil)
|
10
|
+
if key == nil
|
11
|
+
p "No data!"
|
12
|
+
else
|
13
|
+
if max == nil
|
14
|
+
urlc = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=1'
|
15
|
+
page = HTTParty.get(urlc)
|
16
|
+
res = page.parsed_response
|
17
|
+
countAll = res['parameters']['countAll']
|
18
|
+
url = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=' + countAll.to_s
|
19
|
+
else
|
20
|
+
url = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=' + max
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
page_data = HTTParty.get(url)
|
25
|
+
@responses = page_data.parsed_response
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
# save file to txt
|
29
|
+
def self.save_file_txt(index, name, url, size, extension)
|
30
|
+
File.open("log_image.txt", "a+") do |f|
|
31
|
+
f.write("#{index}. name: #{name} | url: #{url} | size: #{size}Kb | extension: #{extension} \n")
|
32
|
+
end
|
33
|
+
end
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
# save file to excel
|
36
|
+
def self.save_file_excel( path, data = {})
|
37
|
+
begin
|
38
|
+
des = path + '/export.xls'
|
39
|
+
workbook = WriteExcel.new(des)
|
40
|
+
format = workbook.add_format
|
41
|
+
format.set_bold()
|
42
|
+
format.set_align('center')
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
44
|
+
data_col = workbook.add_format
|
45
|
+
data_col.set_align('center')
|
46
|
+
format_url = workbook.add_format
|
47
|
+
format_url.set_color('blue')
|
48
|
+
format_url.set_align('center')
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
worksheet = workbook.add_worksheet
|
51
|
+
|
52
|
+
worksheet.write_string(0, 0, 'STT', format)
|
53
|
+
worksheet.write_string(0, 1, 'NAME', format)
|
54
|
+
worksheet.write_string(0, 2, 'URL', format)
|
55
|
+
worksheet.write_string(0, 3, 'SIZE(byte)', format)
|
56
|
+
worksheet.write_string(0, 4, 'EXTENSION', format)
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
58
|
+
multi = []
|
59
|
+
data.each_with_index do |row, index|
|
60
|
+
i = index + 1
|
61
|
+
# p i
|
62
|
+
# p row
|
63
|
+
multi << Thread.new do
|
64
|
+
row.each do |key, value|
|
65
|
+
# p key
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
67
|
+
worksheet.write_string(i, 0, row['index'], data_col)
|
68
|
+
worksheet.write_string(i, 1, row['name'], data_col)
|
69
|
+
worksheet.write_url(i, 2, row['url'], format_url)
|
70
|
+
worksheet.write_string(i, 3, row['size'], data_col)
|
71
|
+
worksheet.write_string(i, 4, row['extension'], data_col)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
multi.each{ |m| m.join }
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
end
|
77
|
+
workbook.close
|
78
|
+
rescue Exception => e
|
79
|
+
p "Can't saved file"
|
80
|
+
p e
|
81
|
+
end
|
82
|
+
end
|
84
83
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
84
|
+
# download image
|
85
|
+
def self.download_image(path, img)
|
86
|
+
timeout = 0
|
87
|
+
begin
|
88
|
+
open(img) do |image|
|
89
|
+
File.open(path, 'wb') do |file|
|
90
|
+
file.write(image.read)
|
91
|
+
@size = image.size
|
92
|
+
end
|
93
|
+
end
|
94
|
+
rescue
|
95
|
+
if timeout < 5
|
96
|
+
timeout += 1
|
97
|
+
p "Retry download image"
|
98
|
+
retry
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
104
102
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
103
|
+
# multi download image
|
104
|
+
def self.multi_download_image(path, imgs)
|
105
|
+
begin
|
106
|
+
threads = []
|
107
|
+
@data = []
|
110
108
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
109
|
+
imgs.each_with_index do |img, index|
|
110
|
+
# p index
|
111
|
+
title = File.basename(img, '.png')
|
112
|
+
des = path. + "/" + index.to_s + "-" + title + ".png"
|
113
|
+
ext = File.extname(img).delete('.')
|
114
|
+
threads << Thread.new do
|
115
|
+
download_image(des, img)
|
116
|
+
row = {"index" => index, "name" => title, "url" => img, "size" => @size, "extension" => ext}
|
117
|
+
@data.push(row)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
threads.each{ |t| t.join }
|
121
|
+
rescue Exception => e
|
122
|
+
p "no data"
|
123
|
+
p e
|
124
|
+
end
|
125
|
+
end
|
128
126
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
127
|
+
# main
|
128
|
+
def self.crawl(keyword = nil, path = ".", max = nil)
|
129
|
+
begin
|
130
|
+
|
131
|
+
images = []
|
132
|
+
des = ""
|
133
|
+
json(keyword, max)
|
134
|
+
@responses['icons'].each_with_index do |item, index|
|
137
135
|
|
138
|
-
|
136
|
+
src = "https://img.icons8.com/#{item['platform']}/2x/#{item['commonName']}.png"
|
139
137
|
|
140
|
-
|
141
|
-
|
142
|
-
|
138
|
+
# add image
|
139
|
+
images.push(src)
|
140
|
+
end
|
143
141
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
142
|
+
multi_download_image(path, images)
|
143
|
+
save_file_excel(path, @data)
|
144
|
+
rescue Exception => e
|
145
|
+
p "--Runtime error--"
|
146
|
+
p e
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
152
150
|
end
|