tomosia_icon8_crawl 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -21
- data/lib/tomosia_icon8_crawl.rb +133 -135
- data/lib/tomosia_icon8_crawl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 255338f89b797ffceeb21f5d1a894a66c935b6d751df2638aab16c6c19c26036
|
4
|
+
data.tar.gz: 2021299e9056f9beae3448440904929205a11b93b66ff67553ee60ed26dcf07b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3108b2c8bb8ead660702486b5544489909096053fe4bb107f8bc7e5b5b368499d024a29bbc02c987a2f2ed19f9e14746840bf0a9e97f37780040a2cc4a4101e5
|
7
|
+
data.tar.gz: cf59adedf520a0ac3f0206b40bfecfb11fb3fa3f2c51e2a4438060b2564e8d658e67c9d0bb666f3a7a92be9a6bf50abfc3ad58e5bf924b28672dedeaf8189c31
|
data/README.md
CHANGED
@@ -14,49 +14,49 @@ gem 'tomosia_icon8_crawl'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
$ bundle install
|
18
18
|
|
19
19
|
Or install it yourself as:
|
20
20
|
|
21
|
-
|
21
|
+
$ gem install tomosia_icon8_crawl
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
### Commandline or Terminal:
|
26
26
|
|
27
|
-
|
27
|
+
Download all photos including pages from icon8.com with keyword you search
|
28
28
|
|
29
|
-
|
29
|
+
$ tomosia_icon8_crawl crawl "keyword" --destination="storage path"
|
30
30
|
|
31
|
-
|
31
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
32
32
|
|
33
|
-
|
33
|
+
$ tomosia_icon8_crawl crawl "keyword" --destination="storage path" --max=number
|
34
34
|
|
35
35
|
### Developers:
|
36
36
|
|
37
|
-
|
37
|
+
Gemfile:
|
38
38
|
|
39
|
-
|
39
|
+
$ gem 'tomosia_icon8_crawl'
|
40
40
|
|
41
|
-
|
41
|
+
Require:
|
42
42
|
|
43
|
-
|
43
|
+
$ require 'tomosia_icon8_crawl'
|
44
44
|
|
45
|
-
|
45
|
+
Download all photos including pages from icon8.com with keyword you search
|
46
46
|
|
47
|
-
|
47
|
+
$ TomosiaIcon8Crawl::CrawlIcon8.crawl("keyword", "destination")
|
48
48
|
|
49
|
-
|
49
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
50
50
|
|
51
|
-
|
51
|
+
$ TomosiaIcon8Crawl::CrawlIcon8.crawl("keyword", "destination", max)
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
Help:
|
54
|
+
- keyword is the word used for searching.
|
55
|
+
ex: corona, car, virus,...
|
56
|
+
- destination is the path of the directory where the image was downloaded.
|
57
|
+
ex: E:\download, C:\download, C:\Desktop,...
|
58
|
+
- max max is the number of images you want to download.
|
59
|
+
ex: 100, 10, 5, 1000,...
|
60
60
|
|
61
61
|
|
62
62
|
|
data/lib/tomosia_icon8_crawl.rb
CHANGED
@@ -1,152 +1,150 @@
|
|
1
1
|
module TomosiaIcon8Crawl
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
require 'open-uri'
|
3
|
+
require "httparty"
|
4
|
+
require 'pry'
|
5
|
+
require 'writeexcel'
|
6
|
+
class CrawlIcon8
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
8
|
+
# get data from website
|
9
|
+
def self.json(key, max = nil)
|
10
|
+
if key == nil
|
11
|
+
p "No data!"
|
12
|
+
else
|
13
|
+
if max == nil
|
14
|
+
urlc = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=1'
|
15
|
+
page = HTTParty.get(urlc)
|
16
|
+
res = page.parsed_response
|
17
|
+
countAll = res['parameters']['countAll']
|
18
|
+
url = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=' + countAll.to_s
|
19
|
+
else
|
20
|
+
url = 'https://search.icons8.com/api/iconsets/v5/search?term=' + key + '&amount=' + max
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
page_data = HTTParty.get(url)
|
25
|
+
@responses = page_data.parsed_response
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
# save file to txt
|
29
|
+
def self.save_file_txt(index, name, url, size, extension)
|
30
|
+
File.open("log_image.txt", "a+") do |f|
|
31
|
+
f.write("#{index}. name: #{name} | url: #{url} | size: #{size}Kb | extension: #{extension} \n")
|
32
|
+
end
|
33
|
+
end
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
# save file to excel
|
36
|
+
def self.save_file_excel( path, data = {})
|
37
|
+
begin
|
38
|
+
des = path + '/export.xls'
|
39
|
+
workbook = WriteExcel.new(des)
|
40
|
+
format = workbook.add_format
|
41
|
+
format.set_bold()
|
42
|
+
format.set_align('center')
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
44
|
+
data_col = workbook.add_format
|
45
|
+
data_col.set_align('center')
|
46
|
+
format_url = workbook.add_format
|
47
|
+
format_url.set_color('blue')
|
48
|
+
format_url.set_align('center')
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
worksheet = workbook.add_worksheet
|
51
|
+
|
52
|
+
worksheet.write_string(0, 0, 'STT', format)
|
53
|
+
worksheet.write_string(0, 1, 'NAME', format)
|
54
|
+
worksheet.write_string(0, 2, 'URL', format)
|
55
|
+
worksheet.write_string(0, 3, 'SIZE(byte)', format)
|
56
|
+
worksheet.write_string(0, 4, 'EXTENSION', format)
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
58
|
+
multi = []
|
59
|
+
data.each_with_index do |row, index|
|
60
|
+
i = index + 1
|
61
|
+
# p i
|
62
|
+
# p row
|
63
|
+
multi << Thread.new do
|
64
|
+
row.each do |key, value|
|
65
|
+
# p key
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
67
|
+
worksheet.write_string(i, 0, row['index'], data_col)
|
68
|
+
worksheet.write_string(i, 1, row['name'], data_col)
|
69
|
+
worksheet.write_url(i, 2, row['url'], format_url)
|
70
|
+
worksheet.write_string(i, 3, row['size'], data_col)
|
71
|
+
worksheet.write_string(i, 4, row['extension'], data_col)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
multi.each{ |m| m.join }
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
end
|
77
|
+
workbook.close
|
78
|
+
rescue Exception => e
|
79
|
+
p "Can't saved file"
|
80
|
+
p e
|
81
|
+
end
|
82
|
+
end
|
84
83
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
84
|
+
# download image
|
85
|
+
def self.download_image(path, img)
|
86
|
+
timeout = 0
|
87
|
+
begin
|
88
|
+
open(img) do |image|
|
89
|
+
File.open(path, 'wb') do |file|
|
90
|
+
file.write(image.read)
|
91
|
+
@size = image.size
|
92
|
+
end
|
93
|
+
end
|
94
|
+
rescue
|
95
|
+
if timeout < 5
|
96
|
+
timeout += 1
|
97
|
+
p "Retry download image"
|
98
|
+
retry
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
104
102
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
103
|
+
# multi download image
|
104
|
+
def self.multi_download_image(path, imgs)
|
105
|
+
begin
|
106
|
+
threads = []
|
107
|
+
@data = []
|
110
108
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
109
|
+
imgs.each_with_index do |img, index|
|
110
|
+
# p index
|
111
|
+
title = File.basename(img, '.png')
|
112
|
+
des = path. + "/" + index.to_s + "-" + title + ".png"
|
113
|
+
ext = File.extname(img).delete('.')
|
114
|
+
threads << Thread.new do
|
115
|
+
download_image(des, img)
|
116
|
+
row = {"index" => index, "name" => title, "url" => img, "size" => @size, "extension" => ext}
|
117
|
+
@data.push(row)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
threads.each{ |t| t.join }
|
121
|
+
rescue Exception => e
|
122
|
+
p "no data"
|
123
|
+
p e
|
124
|
+
end
|
125
|
+
end
|
128
126
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
127
|
+
# main
|
128
|
+
def self.crawl(keyword = nil, path = ".", max = nil)
|
129
|
+
begin
|
130
|
+
|
131
|
+
images = []
|
132
|
+
des = ""
|
133
|
+
json(keyword, max)
|
134
|
+
@responses['icons'].each_with_index do |item, index|
|
137
135
|
|
138
|
-
|
136
|
+
src = "https://img.icons8.com/#{item['platform']}/2x/#{item['commonName']}.png"
|
139
137
|
|
140
|
-
|
141
|
-
|
142
|
-
|
138
|
+
# add image
|
139
|
+
images.push(src)
|
140
|
+
end
|
143
141
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
142
|
+
multi_download_image(path, images)
|
143
|
+
save_file_excel(path, @data)
|
144
|
+
rescue Exception => e
|
145
|
+
p "--Runtime error--"
|
146
|
+
p e
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
152
150
|
end
|