tomosia_icons8_crawl 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -1
- data/Gemfile.lock +1 -1
- data/README.md +21 -21
- data/lib/tomosia_icons8_crawl.rb +133 -134
- data/lib/tomosia_icons8_crawl/version.rb +1 -1
- data/tomosia_icons8_crawl.gemspec +5 -5
- metadata +34 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5f6802e8fdd25f61ecf4b497124924269686d84e0f76f18aa41f5f674fdbd85
|
4
|
+
data.tar.gz: 427bc5bdb482796ce2823d254510f29f5b8deffbd77529cd5b982777edcec0f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fc7f937873badc4050ffe408256a23286576ba59bb6eceb506d21b6b9b1ba9cf53cb7f48103bdb039288aabd86639ebb26bfb20b0e83fac23da4ce0366a8d19
|
7
|
+
data.tar.gz: 8fa04d08d93b104c729cb5d9149ed7529e3e47870477ddefc00e98025489dba332050b6a92116ec8d83c4557d584da552918c2683187cb27a511328b85cd101a
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -14,49 +14,49 @@ gem 'tomosia_icons8_crawl'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
$ bundle install
|
18
18
|
|
19
19
|
Or install it yourself as:
|
20
20
|
|
21
|
-
|
21
|
+
$ gem install tomosia_icons8_crawl
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
### Commandline or Terminal:
|
26
26
|
|
27
|
-
|
27
|
+
Download all photos including pages from icon8.com with keyword you search
|
28
28
|
|
29
|
-
|
29
|
+
$ tomosia_icons8_crawl crawl "keyword" --destination="storage path"
|
30
30
|
|
31
|
-
|
31
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
32
32
|
|
33
|
-
|
33
|
+
$ tomosia_icons8_crawl crawl "keyword" --destination="storage path" --max=number
|
34
34
|
|
35
35
|
### Developers:
|
36
36
|
|
37
|
-
|
37
|
+
Gemfile:
|
38
38
|
|
39
|
-
|
39
|
+
$ gem 'tomosia_icons8_crawl'
|
40
40
|
|
41
|
-
|
41
|
+
Require:
|
42
42
|
|
43
|
-
|
43
|
+
$ require 'tomosia_icons8_crawl'
|
44
44
|
|
45
|
-
|
45
|
+
Download all photos including pages from icon8.com with keyword you search
|
46
46
|
|
47
|
-
|
47
|
+
$ TomosiaIcons8Crawl::CrawlIcons8.crawl("keyword", "destination")
|
48
48
|
|
49
|
-
|
49
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
50
50
|
|
51
|
-
|
51
|
+
$ TomosiaIcons8Crawl::CrawlIcons8.crawl("keyword", "destination", max)
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
Help:
|
54
|
+
- keyword is the word used for searching.
|
55
|
+
ex: corona, car, virus,...
|
56
|
+
- destination is the path of the directory where the image was downloaded.
|
57
|
+
ex: E:\download, C:\download, C:\Desktop,...
|
58
|
+
- max max is the number of images you want to download.
|
59
|
+
ex: 100, 10, 5, 1000,...
|
60
60
|
|
61
61
|
|
62
62
|
|
data/lib/tomosia_icons8_crawl.rb
CHANGED
@@ -1,152 +1,151 @@
|
|
1
1
|
module TomosiaIcons8Crawl
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
require 'open-uri'
|
3
|
+
require "httparty"
|
4
|
+
require 'pry'
|
5
|
+
require 'writeexcel'
|
6
|
+
class CrawlIcons8
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
8
|
+
# get data from website
|
9
|
+
def self.json(key, max)
|
10
|
+
if key == nil
|
11
|
+
p "No data!"
|
12
|
+
else
|
13
|
+
if max == nil
|
14
|
+
uri = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=1"
|
15
|
+
page = HTTParty.get(uri)
|
16
|
+
res = page.parsed_response
|
17
|
+
countAll = res['parameters']['countAll']
|
18
|
+
url = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=#{countAll.to_s}"
|
19
|
+
else
|
20
|
+
url = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=#{max}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
page_data = HTTParty.get(url)
|
24
|
+
@responses = page_data.parsed_response
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
# save file to txt
|
28
|
+
def self.save_file_txt(index, name, url, size, extension)
|
29
|
+
File.open("log_image.txt", "a+") do |f|
|
30
|
+
f.write("#{index}. name: #{name} | url: #{url} | size: #{size}Kb | extension: #{extension} \n")
|
31
|
+
end
|
32
|
+
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
34
|
+
# save file to excel
|
35
|
+
def self.save_file_excel( path, data = {})
|
36
|
+
begin
|
37
|
+
des = path + '/export.xls'
|
38
|
+
workbook = WriteExcel.new(des)
|
39
|
+
format = workbook.add_format
|
40
|
+
format.set_bold()
|
41
|
+
format.set_align('center')
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
43
|
+
data_col = workbook.add_format
|
44
|
+
data_col.set_align('center')
|
45
|
+
format_url = workbook.add_format
|
46
|
+
format_url.set_color('blue')
|
47
|
+
format_url.set_align('center')
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
worksheet = workbook.add_worksheet
|
50
|
+
|
51
|
+
worksheet.write_string(0, 0, 'STT', format)
|
52
|
+
worksheet.write_string(0, 1, 'NAME', format)
|
53
|
+
worksheet.write_string(0, 2, 'URL', format)
|
54
|
+
worksheet.write_string(0, 3, 'SIZE(byte)', format)
|
55
|
+
worksheet.write_string(0, 4, 'EXTENSION', format)
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
multi = []
|
58
|
+
data.each_with_index do |row, index|
|
59
|
+
i = index + 1
|
60
|
+
# p i
|
61
|
+
# p row
|
62
|
+
multi << Thread.new do
|
63
|
+
row.each do |key, value|
|
64
|
+
# p key
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
66
|
+
worksheet.write_string(i, 0, row['index'], data_col)
|
67
|
+
worksheet.write_string(i, 1, row['name'], data_col)
|
68
|
+
worksheet.write_url(i, 2, row['url'], format_url)
|
69
|
+
worksheet.write_string(i, 3, row['size'], data_col)
|
70
|
+
worksheet.write_string(i, 4, row['extension'], data_col)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
multi.each{ |m| m.join }
|
75
75
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
76
|
+
workbook.close
|
77
|
+
rescue Exception => e
|
78
|
+
p "Can't saved file"
|
79
|
+
p e
|
80
|
+
# break
|
81
|
+
end
|
82
|
+
end
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
84
|
+
# download image
|
85
|
+
def self.download_image(path, img)
|
86
|
+
timeout = 0
|
87
|
+
begin
|
88
|
+
open(img) do |image|
|
89
|
+
File.open(path, 'wb') do |file|
|
90
|
+
file.write(image.read)
|
91
|
+
@size = image.size
|
92
|
+
# p @size
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue
|
96
|
+
if timeout < 5
|
97
|
+
timeout += 1
|
98
|
+
p "Retry download image"
|
99
|
+
retry
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
103
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
# multi download image
|
105
|
+
def self.multi_download_image(path, imgs)
|
106
|
+
begin
|
107
|
+
threads = []
|
108
|
+
@data = []
|
109
109
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
110
|
+
imgs.each_with_index do |img, index|
|
111
|
+
# p index
|
112
|
+
title = File.basename(img, '.png')
|
113
|
+
des = path. + "/" + index.to_s + "-" + title + ".png"
|
114
|
+
ext = File.extname(img).delete('.')
|
115
|
+
threads << Thread.new do
|
116
|
+
download_image(des, img)
|
117
|
+
row = {"index" => index, "name" => title, "url" => img, "size" => @size, "extension" => ext}
|
118
|
+
@data.push(row)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
threads.each{ |t| t.join }
|
122
|
+
rescue Exception => e
|
123
|
+
p "no data"
|
124
|
+
p e
|
125
|
+
end
|
126
|
+
end
|
127
127
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
@responses['icons'].each_with_index do |item, index|
|
128
|
+
# main
|
129
|
+
def self.crawl(keyword = nil, path = ".", max = nil)
|
130
|
+
begin
|
131
|
+
images = []
|
132
|
+
des = ""
|
133
|
+
json(keyword, max)
|
134
|
+
@responses['icons'].each_with_index do |item, index|
|
136
135
|
|
137
|
-
|
136
|
+
src = "https://img.icons8.com/#{item['platform']}/2x/#{item['commonName']}.png"
|
138
137
|
|
139
|
-
|
140
|
-
|
141
|
-
|
138
|
+
# add image
|
139
|
+
images.push(src)
|
140
|
+
end
|
142
141
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
142
|
+
multi_download_image(path, images)
|
143
|
+
save_file_excel(path, @data)
|
144
|
+
rescue Exception => e
|
145
|
+
p "--Runtime error--"
|
146
|
+
p e
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
151
150
|
end
|
152
151
|
# TomosiaIcons8Crawl::CrawlIcons8.crawl("dog", "E:\img")
|
@@ -13,12 +13,12 @@ Gem::Specification.new do |spec|
|
|
13
13
|
|
14
14
|
spec.add_development_dependency "bundler"
|
15
15
|
spec.add_development_dependency "rake"
|
16
|
-
spec.add_runtime_dependency "thor"
|
16
|
+
spec.add_runtime_dependency "thor", '~> 1.0', '>= 1.0.1'
|
17
17
|
spec.add_development_dependency "rspec"
|
18
|
-
spec.add_runtime_dependency "httparty"
|
19
|
-
spec.add_runtime_dependency "writeexcel"
|
20
|
-
spec.add_runtime_dependency "nokogiri"
|
21
|
-
spec.add_runtime_dependency "pry"
|
18
|
+
spec.add_runtime_dependency "httparty", '~> 0.17.3'
|
19
|
+
spec.add_runtime_dependency "writeexcel", '~> 1.0', '>= 1.0.5'
|
20
|
+
spec.add_runtime_dependency "nokogiri", '~> 1.10', '>= 1.10.10'
|
21
|
+
spec.add_runtime_dependency "pry", '~> 0.13.1'
|
22
22
|
|
23
23
|
# Specify which files should be added to the gem when it is released.
|
24
24
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_icons8_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ninh Tomosia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,16 +42,22 @@ dependencies:
|
|
42
42
|
name: thor
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
45
48
|
- - ">="
|
46
49
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
50
|
+
version: 1.0.1
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.0'
|
52
58
|
- - ">="
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
60
|
+
version: 1.0.1
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: rspec
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,58 +76,70 @@ dependencies:
|
|
70
76
|
name: httparty
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
|
-
- - "
|
79
|
+
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
81
|
+
version: 0.17.3
|
76
82
|
type: :runtime
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
|
-
- - "
|
86
|
+
- - "~>"
|
81
87
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
88
|
+
version: 0.17.3
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
90
|
name: writeexcel
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
86
92
|
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '1.0'
|
87
96
|
- - ">="
|
88
97
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
98
|
+
version: 1.0.5
|
90
99
|
type: :runtime
|
91
100
|
prerelease: false
|
92
101
|
version_requirements: !ruby/object:Gem::Requirement
|
93
102
|
requirements:
|
103
|
+
- - "~>"
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '1.0'
|
94
106
|
- - ">="
|
95
107
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
108
|
+
version: 1.0.5
|
97
109
|
- !ruby/object:Gem::Dependency
|
98
110
|
name: nokogiri
|
99
111
|
requirement: !ruby/object:Gem::Requirement
|
100
112
|
requirements:
|
113
|
+
- - "~>"
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '1.10'
|
101
116
|
- - ">="
|
102
117
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
118
|
+
version: 1.10.10
|
104
119
|
type: :runtime
|
105
120
|
prerelease: false
|
106
121
|
version_requirements: !ruby/object:Gem::Requirement
|
107
122
|
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '1.10'
|
108
126
|
- - ">="
|
109
127
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
128
|
+
version: 1.10.10
|
111
129
|
- !ruby/object:Gem::Dependency
|
112
130
|
name: pry
|
113
131
|
requirement: !ruby/object:Gem::Requirement
|
114
132
|
requirements:
|
115
|
-
- - "
|
133
|
+
- - "~>"
|
116
134
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
135
|
+
version: 0.13.1
|
118
136
|
type: :runtime
|
119
137
|
prerelease: false
|
120
138
|
version_requirements: !ruby/object:Gem::Requirement
|
121
139
|
requirements:
|
122
|
-
- - "
|
140
|
+
- - "~>"
|
123
141
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
142
|
+
version: 0.13.1
|
125
143
|
description:
|
126
144
|
email:
|
127
145
|
- tt.ninh.le@tomosia.com
|