tomosia_wallhere_crawl 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b52398c94e10a3e7d0b5efaacf9f51e5c77565d0c053017221abca624d025932
4
- data.tar.gz: e8568e2f02a76cea021e43512019b66a554d952819aea064229b02bf3ab2d93f
3
+ metadata.gz: a6c61da5b1321f6aedfbcd3ba4dba8e5fb101ec4f36ca4e6c974e1f26ab01cf7
4
+ data.tar.gz: 8b3d0be435727d25a4773a02b57d8f063bba8bb7b1a550724306dedf3003de39
5
5
  SHA512:
6
- metadata.gz: 336d1c213416c431882c12cdae1296860720466d1e77ca5febe0cc86f182910c37f4a0aa282fdd125ab051b68ffb688e659f62ae42e0988ba36501953d283234
7
- data.tar.gz: ea2ffbc8c6760cc615184b7978e1066bb15f2b276bb0935af1db7dfaf68cfaac977818afbb3e09f24f832fdaabb38aa46bb23daf3c4836c1e17209b75d2820ae
6
+ metadata.gz: 6a5064bca8a988c533b110ed2b31c6f7b264030dfd4fd7ecc1766dec801096bc85c9bf374157b21bd08f2b9fee1ed9743ed7aeea845e8195b85ee30490d3280f
7
+ data.tar.gz: b07d631fa4d6c33806e91641688cda81f872ebdfa9a77fc7af830eed579aaf9e1315bdf7ce623680263fa531e94bdac0f7ba0af67597880b0e92363f5bd3cbd0
data/Gemfile CHANGED
@@ -4,3 +4,6 @@ source "https://rubygems.org"
4
4
  gemspec
5
5
 
6
6
  gem "rake", "~> 12.0"
7
+ gem 'nokogiri', '~> 1.10', '>= 1.10.10'
8
+ gem "rspec", "~> 3.0"
9
+ gem 'writeexcel', '~> 1.0', '>= 1.0.5'
@@ -0,0 +1,45 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tomosia_wallhere_crawl (0.1.1)
5
+ nokogiri
6
+ thor
7
+ writeexcel
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ diff-lcs (1.4.4)
13
+ mini_portile2 (2.4.0)
14
+ nokogiri (1.10.10)
15
+ mini_portile2 (~> 2.4.0)
16
+ rake (12.3.3)
17
+ rspec (3.9.0)
18
+ rspec-core (~> 3.9.0)
19
+ rspec-expectations (~> 3.9.0)
20
+ rspec-mocks (~> 3.9.0)
21
+ rspec-core (3.9.2)
22
+ rspec-support (~> 3.9.3)
23
+ rspec-expectations (3.9.2)
24
+ diff-lcs (>= 1.2.0, < 2.0)
25
+ rspec-support (~> 3.9.0)
26
+ rspec-mocks (3.9.1)
27
+ diff-lcs (>= 1.2.0, < 2.0)
28
+ rspec-support (~> 3.9.0)
29
+ rspec-support (3.9.3)
30
+ thor (1.0.1)
31
+ writeexcel (1.0.5)
32
+
33
+ PLATFORMS
34
+ ruby
35
+
36
+ DEPENDENCIES
37
+ bundler
38
+ nokogiri (~> 1.10, >= 1.10.10)
39
+ rake (~> 12.0)
40
+ rspec (~> 3.0)
41
+ tomosia_wallhere_crawl!
42
+ writeexcel (~> 1.0, >= 1.0.5)
43
+
44
+ BUNDLED WITH
45
+ 2.1.4
data/README.md CHANGED
@@ -21,9 +21,17 @@ Or install it yourself as:
21
21
  $ gem install tomosia_wallhere_crawl
22
22
 
23
23
  ## Usage
24
+ _NOTE: key = "tag" , destination: "url saving" , max :number image
24
25
 
25
- TODO: Write usage instructions here
26
+ _/ Crawl all image in tag!
27
+ tomosia_wallhere_crawl crawl "key" --destination " "
28
+ => tomosia_wallhere_crawl crawl "aaa" --destination "/home/tung/Desktop/img/"
26
29
 
30
+ _/ Crawl all images in tag with number of images!
31
+ tomosia_wallhere_crawl crawl "key" --destination " " --max
32
+ => tomosia_wallhere_crawl crawl "aaa" --destination "/home/tung/Desktop/img/" --max=100
33
+
34
+ File save info images of tag in destination
27
35
  ## Development
28
36
 
29
37
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
2
4
  task :default => :spec
5
+
6
+ namespace :gem do
7
+ task :build do
8
+ system "rake build && rake install"
9
+ end
10
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tomosia_wallhere_crawl/cli'
4
+ TomosiaWallhereCrawl::Cli.start
@@ -1,10 +1,12 @@
1
1
  require 'open-uri'
2
2
  require 'nokogiri'
3
3
  require 'writeexcel'
4
+ require 'thor'
4
5
  module TomosiaWallhereCrawl
5
6
  class CrawlImage
6
- def savedata (data = {}, description)
7
- workbook = WriteExcel.new("#{description}/InfoImage.xls")
7
+
8
+ def savedata (data = {}, destination)
9
+ workbook = WriteExcel.new("#{destination}/InfoImage.xls")
8
10
  worksheet = workbook.add_worksheet
9
11
  data.each_with_index do |row, stt|
10
12
  row.each do |key, value|
@@ -14,13 +16,14 @@ module TomosiaWallhereCrawl
14
16
  worksheet.write_string(stt, 3, row['size'])
15
17
  end
16
18
  end
17
- workbook.close
19
+ workbook.close
20
+ puts "Save successfully"
18
21
  end
19
- def crawldata(key,description,max)
20
- data=[]
21
-
22
+
23
+ def crawldata(key,destination,max=nil)
22
24
  sum = 0
23
- index = 1
25
+ index = 1
26
+ images = []
24
27
  while sum != max do
25
28
  # Open url
26
29
  url = "https://wallhere.com/en/wallpapers?q=#{key}&page=#{index}"
@@ -28,45 +31,62 @@ module TomosiaWallhereCrawl
28
31
  content = document.read
29
32
  parsed_content = Nokogiri::HTML(content)
30
33
  length = parsed_content.css('.item').to_a.length - 1
31
- if length == -1
32
- break
33
- else
34
- i = 0
34
+ total_img = parsed_content.css('div.hub-totalinfo').text.split(' HD Wallpapers')[0].to_i
35
+ if max == nil || max > total_img
36
+ max = total_img
37
+ puts "This tag has #{total_img} pictures"
38
+ end
39
+ i = 0
35
40
  for i in i..length
36
- nameimg = File.basename(parsed_content.css('.item').to_a[i].children.children.first.to_h['src']).delete('.jpg!s')
37
- # Get url img
38
- urlimg = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
39
- open(urlimg) do |image|
40
- File.open("#{description}#{nameimg}", "w+") do |file|
41
- file.write(image.read)
42
- #Get info img
43
- n = nameimg
44
- ui = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
45
- ex = File.extname(parsed_content.css('.item').to_a[i].children.children.first.to_h['src']).delete('.!s')
46
- size = File.size("#{description}#{nameimg}")
47
- s = "#{size} kb"
48
- row = {'stt'=>i, 'name'=>n, 'url'=>ui, 'extension'=>ex, 'size'=>s}
49
- data.push(row)
50
- end
51
- end
52
- sum += 1
53
- if max == sum
54
- break
55
- end
56
- end
41
+ urlimg = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
42
+ images.push(urlimg)
43
+
44
+ print '.'
45
+ sum += 1
46
+ if max == sum
47
+ break
48
+ end
49
+ end
50
+ index += 1
57
51
  end
58
- index += 1
52
+ download(images,destination)
53
+ end
54
+
55
+ def download(images,destination)
56
+ data = []
57
+ row = {}
58
+ thread = []
59
+ images.each do |img|
60
+ thread << Thread.new(img) do
61
+ timeout = 0
62
+ begin
63
+ open(img) do |image|
64
+ nameimg = File.basename(img,".jpg!s")
65
+ ui = img
66
+ ex = File.extname(img).delete('.!s')
67
+ size = ""
68
+ File.open("#{destination}#{nameimg}","wb") do |file|
69
+ file.write(image.read)
70
+ size = image.size
71
+ end
72
+ size = size.to_s + " bytes"
73
+ row = {"name"=>nameimg, "url"=>ui, "extension"=> ex, "size" => size}
74
+ data.push(row)
75
+ end
76
+ rescue => exception
77
+ if timeout < 3
78
+ timeout += 1
79
+ retry
80
+ else
81
+ next
82
+ end
83
+ end
59
84
  end
60
- savedata(data,description)
61
- end
62
- def crawl(key,description,max = nil)
63
- mutithread = (0..2).map do
64
- Thread.new do
65
- crawldata(key,description,max)
66
- end
67
- end
68
- mutithread.map(&:join)
69
- end
85
+ end
86
+ thread.each {|t| t.join}
87
+ puts " "
88
+ puts "Download successfully"
89
+ savedata(data,destination)
70
90
  end
71
91
  end
72
-
92
+ end
@@ -0,0 +1,17 @@
1
+ require 'thor'
2
+ require_relative '../tomosia_wallhere_crawl'
3
+
4
+ module TomosiaWallhereCrawl
5
+ class Cli < Thor
6
+ desc "crawl KEYWORD", "enter KEYWORD to search"
7
+ option :destination
8
+ option :max
9
+ def crawl(keyword)
10
+ if options[:max] == nil
11
+ TomosiaWallhereCrawl::CrawlImage.new.crawldata(keyword, options[:destination], options[:max])
12
+ else
13
+ TomosiaWallhereCrawl::CrawlImage.new.crawldata(keyword, options[:destination], options[:max].to_i)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module TomosiaWallhereCrawl
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -11,12 +11,17 @@ Gem::Specification.new do |spec|
11
11
  spec.homepage = "https://github.com/nguyensontung183183/tomosia_wallhere_crawl.git"
12
12
  spec.license = "MIT"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
- # Specify which files should be added to the gem when it is released.
15
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
16
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
- end
14
+
15
+
16
+ spec.files = `git ls-files`.split("\n")
19
17
  spec.bindir = "exe"
20
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.executables = ["tomosia_wallhere_crawl"]
21
19
  spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'thor'
22
+ spec.add_runtime_dependency 'nokogiri'
23
+ spec.add_runtime_dependency 'writeexcel'
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
22
27
  end
metadata CHANGED
@@ -1,31 +1,121 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_wallhere_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - nguyen son tung
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-05 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-08-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: writeexcel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
13
97
  description: Write a longer description or delete this line.
14
98
  email:
15
99
  - nguyensontung18183
16
- executables: []
100
+ executables:
101
+ - tomosia_wallhere_crawl
17
102
  extensions: []
18
103
  extra_rdoc_files: []
19
104
  files:
20
105
  - CODE_OF_CONDUCT.md
21
106
  - Gemfile
107
+ - Gemfile.lock
22
108
  - LICENSE.txt
23
109
  - README.md
24
110
  - Rakefile
25
111
  - bin/console
26
112
  - bin/setup
113
+ - exe/tomosia_wallhere_crawl
27
114
  - lib/tomosia_wallhere_crawl.rb
115
+ - lib/tomosia_wallhere_crawl/cli.rb
28
116
  - lib/tomosia_wallhere_crawl/version.rb
117
+ - pkg/tomosia_wallhere_crawl-0.1.1.gem
118
+ - tomosia_wallhere_crawl-0.1.0.gem
29
119
  - tomosia_wallhere_crawl.gemspec
30
120
  homepage: https://github.com/nguyensontung183183/tomosia_wallhere_crawl.git
31
121
  licenses: