tomosia_icons8_crawl 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -1
- data/Gemfile.lock +1 -1
- data/README.md +21 -21
- data/lib/tomosia_icons8_crawl.rb +133 -134
- data/lib/tomosia_icons8_crawl/version.rb +1 -1
- data/tomosia_icons8_crawl.gemspec +5 -5
- metadata +34 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5f6802e8fdd25f61ecf4b497124924269686d84e0f76f18aa41f5f674fdbd85
|
4
|
+
data.tar.gz: 427bc5bdb482796ce2823d254510f29f5b8deffbd77529cd5b982777edcec0f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fc7f937873badc4050ffe408256a23286576ba59bb6eceb506d21b6b9b1ba9cf53cb7f48103bdb039288aabd86639ebb26bfb20b0e83fac23da4ce0366a8d19
|
7
|
+
data.tar.gz: 8fa04d08d93b104c729cb5d9149ed7529e3e47870477ddefc00e98025489dba332050b6a92116ec8d83c4557d584da552918c2683187cb27a511328b85cd101a
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -14,49 +14,49 @@ gem 'tomosia_icons8_crawl'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
$ bundle install
|
18
18
|
|
19
19
|
Or install it yourself as:
|
20
20
|
|
21
|
-
|
21
|
+
$ gem install tomosia_icons8_crawl
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
### Commandline or Terminal:
|
26
26
|
|
27
|
-
|
27
|
+
Download all photos including pages from icon8.com with keyword you search
|
28
28
|
|
29
|
-
|
29
|
+
$ tomosia_icons8_crawl crawl "keyword" --destination="storage path"
|
30
30
|
|
31
|
-
|
31
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
32
32
|
|
33
|
-
|
33
|
+
$ tomosia_icons8_crawl crawl "keyword" --destination="storage path" --max=number
|
34
34
|
|
35
35
|
### Developers:
|
36
36
|
|
37
|
-
|
37
|
+
Gemfile:
|
38
38
|
|
39
|
-
|
39
|
+
$ gem 'tomosia_icons8_crawl'
|
40
40
|
|
41
|
-
|
41
|
+
Require:
|
42
42
|
|
43
|
-
|
43
|
+
$ require 'tomosia_icons8_crawl'
|
44
44
|
|
45
|
-
|
45
|
+
Download all photos including pages from icon8.com with keyword you search
|
46
46
|
|
47
|
-
|
47
|
+
$ TomosiaIcons8Crawl::CrawlIcons8.crawl("keyword", "destination")
|
48
48
|
|
49
|
-
|
49
|
+
Download pictures with number you want to get from icon8.com with keyword to find
|
50
50
|
|
51
|
-
|
51
|
+
$ TomosiaIcons8Crawl::CrawlIcons8.crawl("keyword", "destination", max)
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
Help:
|
54
|
+
- keyword is the word used for searching.
|
55
|
+
ex: corona, car, virus,...
|
56
|
+
- destination is the path of the directory where the image was downloaded.
|
57
|
+
ex: E:\download, C:\download, C:\Desktop,...
|
58
|
+
- max max is the number of images you want to download.
|
59
|
+
ex: 100, 10, 5, 1000,...
|
60
60
|
|
61
61
|
|
62
62
|
|
data/lib/tomosia_icons8_crawl.rb
CHANGED
@@ -1,152 +1,151 @@
|
|
1
1
|
module TomosiaIcons8Crawl
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
require 'open-uri'
|
3
|
+
require "httparty"
|
4
|
+
require 'pry'
|
5
|
+
require 'writeexcel'
|
6
|
+
class CrawlIcons8
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
8
|
+
# get data from website
|
9
|
+
def self.json(key, max)
|
10
|
+
if key == nil
|
11
|
+
p "No data!"
|
12
|
+
else
|
13
|
+
if max == nil
|
14
|
+
uri = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=1"
|
15
|
+
page = HTTParty.get(uri)
|
16
|
+
res = page.parsed_response
|
17
|
+
countAll = res['parameters']['countAll']
|
18
|
+
url = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=#{countAll.to_s}"
|
19
|
+
else
|
20
|
+
url = "https://search.icons8.com/api/iconsets/v5/search?term=#{key}&amount=#{max}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
page_data = HTTParty.get(url)
|
24
|
+
@responses = page_data.parsed_response
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
# save file to txt
|
28
|
+
def self.save_file_txt(index, name, url, size, extension)
|
29
|
+
File.open("log_image.txt", "a+") do |f|
|
30
|
+
f.write("#{index}. name: #{name} | url: #{url} | size: #{size}Kb | extension: #{extension} \n")
|
31
|
+
end
|
32
|
+
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
34
|
+
# save file to excel
|
35
|
+
def self.save_file_excel( path, data = {})
|
36
|
+
begin
|
37
|
+
des = path + '/export.xls'
|
38
|
+
workbook = WriteExcel.new(des)
|
39
|
+
format = workbook.add_format
|
40
|
+
format.set_bold()
|
41
|
+
format.set_align('center')
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
43
|
+
data_col = workbook.add_format
|
44
|
+
data_col.set_align('center')
|
45
|
+
format_url = workbook.add_format
|
46
|
+
format_url.set_color('blue')
|
47
|
+
format_url.set_align('center')
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
worksheet = workbook.add_worksheet
|
50
|
+
|
51
|
+
worksheet.write_string(0, 0, 'STT', format)
|
52
|
+
worksheet.write_string(0, 1, 'NAME', format)
|
53
|
+
worksheet.write_string(0, 2, 'URL', format)
|
54
|
+
worksheet.write_string(0, 3, 'SIZE(byte)', format)
|
55
|
+
worksheet.write_string(0, 4, 'EXTENSION', format)
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
multi = []
|
58
|
+
data.each_with_index do |row, index|
|
59
|
+
i = index + 1
|
60
|
+
# p i
|
61
|
+
# p row
|
62
|
+
multi << Thread.new do
|
63
|
+
row.each do |key, value|
|
64
|
+
# p key
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
66
|
+
worksheet.write_string(i, 0, row['index'], data_col)
|
67
|
+
worksheet.write_string(i, 1, row['name'], data_col)
|
68
|
+
worksheet.write_url(i, 2, row['url'], format_url)
|
69
|
+
worksheet.write_string(i, 3, row['size'], data_col)
|
70
|
+
worksheet.write_string(i, 4, row['extension'], data_col)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
multi.each{ |m| m.join }
|
75
75
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
76
|
+
workbook.close
|
77
|
+
rescue Exception => e
|
78
|
+
p "Can't saved file"
|
79
|
+
p e
|
80
|
+
# break
|
81
|
+
end
|
82
|
+
end
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
84
|
+
# download image
|
85
|
+
def self.download_image(path, img)
|
86
|
+
timeout = 0
|
87
|
+
begin
|
88
|
+
open(img) do |image|
|
89
|
+
File.open(path, 'wb') do |file|
|
90
|
+
file.write(image.read)
|
91
|
+
@size = image.size
|
92
|
+
# p @size
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue
|
96
|
+
if timeout < 5
|
97
|
+
timeout += 1
|
98
|
+
p "Retry download image"
|
99
|
+
retry
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
103
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
# multi download image
|
105
|
+
def self.multi_download_image(path, imgs)
|
106
|
+
begin
|
107
|
+
threads = []
|
108
|
+
@data = []
|
109
109
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
110
|
+
imgs.each_with_index do |img, index|
|
111
|
+
# p index
|
112
|
+
title = File.basename(img, '.png')
|
113
|
+
des = path. + "/" + index.to_s + "-" + title + ".png"
|
114
|
+
ext = File.extname(img).delete('.')
|
115
|
+
threads << Thread.new do
|
116
|
+
download_image(des, img)
|
117
|
+
row = {"index" => index, "name" => title, "url" => img, "size" => @size, "extension" => ext}
|
118
|
+
@data.push(row)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
threads.each{ |t| t.join }
|
122
|
+
rescue Exception => e
|
123
|
+
p "no data"
|
124
|
+
p e
|
125
|
+
end
|
126
|
+
end
|
127
127
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
@responses['icons'].each_with_index do |item, index|
|
128
|
+
# main
|
129
|
+
def self.crawl(keyword = nil, path = ".", max = nil)
|
130
|
+
begin
|
131
|
+
images = []
|
132
|
+
des = ""
|
133
|
+
json(keyword, max)
|
134
|
+
@responses['icons'].each_with_index do |item, index|
|
136
135
|
|
137
|
-
|
136
|
+
src = "https://img.icons8.com/#{item['platform']}/2x/#{item['commonName']}.png"
|
138
137
|
|
139
|
-
|
140
|
-
|
141
|
-
|
138
|
+
# add image
|
139
|
+
images.push(src)
|
140
|
+
end
|
142
141
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
142
|
+
multi_download_image(path, images)
|
143
|
+
save_file_excel(path, @data)
|
144
|
+
rescue Exception => e
|
145
|
+
p "--Runtime error--"
|
146
|
+
p e
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
151
150
|
end
|
152
151
|
# TomosiaIcons8Crawl::CrawlIcons8.crawl("dog", "E:\img")
|
@@ -13,12 +13,12 @@ Gem::Specification.new do |spec|
|
|
13
13
|
|
14
14
|
spec.add_development_dependency "bundler"
|
15
15
|
spec.add_development_dependency "rake"
|
16
|
-
spec.add_runtime_dependency "thor"
|
16
|
+
spec.add_runtime_dependency "thor", '~> 1.0', '>= 1.0.1'
|
17
17
|
spec.add_development_dependency "rspec"
|
18
|
-
spec.add_runtime_dependency "httparty"
|
19
|
-
spec.add_runtime_dependency "writeexcel"
|
20
|
-
spec.add_runtime_dependency "nokogiri"
|
21
|
-
spec.add_runtime_dependency "pry"
|
18
|
+
spec.add_runtime_dependency "httparty", '~> 0.17.3'
|
19
|
+
spec.add_runtime_dependency "writeexcel", '~> 1.0', '>= 1.0.5'
|
20
|
+
spec.add_runtime_dependency "nokogiri", '~> 1.10', '>= 1.10.10'
|
21
|
+
spec.add_runtime_dependency "pry", '~> 0.13.1'
|
22
22
|
|
23
23
|
# Specify which files should be added to the gem when it is released.
|
24
24
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_icons8_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ninh Tomosia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,16 +42,22 @@ dependencies:
|
|
42
42
|
name: thor
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
45
48
|
- - ">="
|
46
49
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
50
|
+
version: 1.0.1
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.0'
|
52
58
|
- - ">="
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
60
|
+
version: 1.0.1
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: rspec
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,58 +76,70 @@ dependencies:
|
|
70
76
|
name: httparty
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
|
-
- - "
|
79
|
+
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
81
|
+
version: 0.17.3
|
76
82
|
type: :runtime
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
|
-
- - "
|
86
|
+
- - "~>"
|
81
87
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
88
|
+
version: 0.17.3
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
90
|
name: writeexcel
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
86
92
|
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '1.0'
|
87
96
|
- - ">="
|
88
97
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
98
|
+
version: 1.0.5
|
90
99
|
type: :runtime
|
91
100
|
prerelease: false
|
92
101
|
version_requirements: !ruby/object:Gem::Requirement
|
93
102
|
requirements:
|
103
|
+
- - "~>"
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '1.0'
|
94
106
|
- - ">="
|
95
107
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
108
|
+
version: 1.0.5
|
97
109
|
- !ruby/object:Gem::Dependency
|
98
110
|
name: nokogiri
|
99
111
|
requirement: !ruby/object:Gem::Requirement
|
100
112
|
requirements:
|
113
|
+
- - "~>"
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '1.10'
|
101
116
|
- - ">="
|
102
117
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
118
|
+
version: 1.10.10
|
104
119
|
type: :runtime
|
105
120
|
prerelease: false
|
106
121
|
version_requirements: !ruby/object:Gem::Requirement
|
107
122
|
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '1.10'
|
108
126
|
- - ">="
|
109
127
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
128
|
+
version: 1.10.10
|
111
129
|
- !ruby/object:Gem::Dependency
|
112
130
|
name: pry
|
113
131
|
requirement: !ruby/object:Gem::Requirement
|
114
132
|
requirements:
|
115
|
-
- - "
|
133
|
+
- - "~>"
|
116
134
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
135
|
+
version: 0.13.1
|
118
136
|
type: :runtime
|
119
137
|
prerelease: false
|
120
138
|
version_requirements: !ruby/object:Gem::Requirement
|
121
139
|
requirements:
|
122
|
-
- - "
|
140
|
+
- - "~>"
|
123
141
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
142
|
+
version: 0.13.1
|
125
143
|
description:
|
126
144
|
email:
|
127
145
|
- tt.ninh.le@tomosia.com
|