tomosia_wallhere_crawl 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b52398c94e10a3e7d0b5efaacf9f51e5c77565d0c053017221abca624d025932
4
- data.tar.gz: e8568e2f02a76cea021e43512019b66a554d952819aea064229b02bf3ab2d93f
3
+ metadata.gz: a6c61da5b1321f6aedfbcd3ba4dba8e5fb101ec4f36ca4e6c974e1f26ab01cf7
4
+ data.tar.gz: 8b3d0be435727d25a4773a02b57d8f063bba8bb7b1a550724306dedf3003de39
5
5
  SHA512:
6
- metadata.gz: 336d1c213416c431882c12cdae1296860720466d1e77ca5febe0cc86f182910c37f4a0aa282fdd125ab051b68ffb688e659f62ae42e0988ba36501953d283234
7
- data.tar.gz: ea2ffbc8c6760cc615184b7978e1066bb15f2b276bb0935af1db7dfaf68cfaac977818afbb3e09f24f832fdaabb38aa46bb23daf3c4836c1e17209b75d2820ae
6
+ metadata.gz: 6a5064bca8a988c533b110ed2b31c6f7b264030dfd4fd7ecc1766dec801096bc85c9bf374157b21bd08f2b9fee1ed9743ed7aeea845e8195b85ee30490d3280f
7
+ data.tar.gz: b07d631fa4d6c33806e91641688cda81f872ebdfa9a77fc7af830eed579aaf9e1315bdf7ce623680263fa531e94bdac0f7ba0af67597880b0e92363f5bd3cbd0
data/Gemfile CHANGED
@@ -4,3 +4,6 @@ source "https://rubygems.org"
4
4
  gemspec
5
5
 
6
6
  gem "rake", "~> 12.0"
7
+ gem 'nokogiri', '~> 1.10', '>= 1.10.10'
8
+ gem "rspec", "~> 3.0"
9
+ gem 'writeexcel', '~> 1.0', '>= 1.0.5'
@@ -0,0 +1,45 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tomosia_wallhere_crawl (0.1.1)
5
+ nokogiri
6
+ thor
7
+ writeexcel
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ diff-lcs (1.4.4)
13
+ mini_portile2 (2.4.0)
14
+ nokogiri (1.10.10)
15
+ mini_portile2 (~> 2.4.0)
16
+ rake (12.3.3)
17
+ rspec (3.9.0)
18
+ rspec-core (~> 3.9.0)
19
+ rspec-expectations (~> 3.9.0)
20
+ rspec-mocks (~> 3.9.0)
21
+ rspec-core (3.9.2)
22
+ rspec-support (~> 3.9.3)
23
+ rspec-expectations (3.9.2)
24
+ diff-lcs (>= 1.2.0, < 2.0)
25
+ rspec-support (~> 3.9.0)
26
+ rspec-mocks (3.9.1)
27
+ diff-lcs (>= 1.2.0, < 2.0)
28
+ rspec-support (~> 3.9.0)
29
+ rspec-support (3.9.3)
30
+ thor (1.0.1)
31
+ writeexcel (1.0.5)
32
+
33
+ PLATFORMS
34
+ ruby
35
+
36
+ DEPENDENCIES
37
+ bundler
38
+ nokogiri (~> 1.10, >= 1.10.10)
39
+ rake (~> 12.0)
40
+ rspec (~> 3.0)
41
+ tomosia_wallhere_crawl!
42
+ writeexcel (~> 1.0, >= 1.0.5)
43
+
44
+ BUNDLED WITH
45
+ 2.1.4
data/README.md CHANGED
@@ -21,9 +21,17 @@ Or install it yourself as:
21
21
  $ gem install tomosia_wallhere_crawl
22
22
 
23
23
  ## Usage
24
+ _NOTE: key = "tag" , destination: "url saving" , max :number image
24
25
 
25
- TODO: Write usage instructions here
26
+ _/ Crawl all image in tag!
27
+ tomosia_wallhere_crawl crawl "key" --destination " "
28
+ => tomosia_wallhere_crawl crawl "aaa" --destination "/home/tung/Desktop/img/"
26
29
 
30
+ _/ Crawl all images in tag with number of images!
31
+ tomosia_wallhere_crawl crawl "key" --destination " " --max
32
+ => tomosia_wallhere_crawl crawl "aaa" --destination "/home/tung/Desktop/img/" --max=100
33
+
34
+ File save info images of tag in destination
27
35
  ## Development
28
36
 
29
37
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
2
4
  task :default => :spec
5
+
6
+ namespace :gem do
7
+ task :build do
8
+ system "rake build && rake install"
9
+ end
10
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tomosia_wallhere_crawl/cli'
4
+ TomosiaWallhereCrawl::Cli.start
@@ -1,10 +1,12 @@
1
1
  require 'open-uri'
2
2
  require 'nokogiri'
3
3
  require 'writeexcel'
4
+ require 'thor'
4
5
  module TomosiaWallhereCrawl
5
6
  class CrawlImage
6
- def savedata (data = {}, description)
7
- workbook = WriteExcel.new("#{description}/InfoImage.xls")
7
+
8
+ def savedata (data = {}, destination)
9
+ workbook = WriteExcel.new("#{destination}/InfoImage.xls")
8
10
  worksheet = workbook.add_worksheet
9
11
  data.each_with_index do |row, stt|
10
12
  row.each do |key, value|
@@ -14,13 +16,14 @@ module TomosiaWallhereCrawl
14
16
  worksheet.write_string(stt, 3, row['size'])
15
17
  end
16
18
  end
17
- workbook.close
19
+ workbook.close
20
+ puts "Save successfully"
18
21
  end
19
- def crawldata(key,description,max)
20
- data=[]
21
-
22
+
23
+ def crawldata(key,destination,max=nil)
22
24
  sum = 0
23
- index = 1
25
+ index = 1
26
+ images = []
24
27
  while sum != max do
25
28
  # Open url
26
29
  url = "https://wallhere.com/en/wallpapers?q=#{key}&page=#{index}"
@@ -28,45 +31,62 @@ module TomosiaWallhereCrawl
28
31
  content = document.read
29
32
  parsed_content = Nokogiri::HTML(content)
30
33
  length = parsed_content.css('.item').to_a.length - 1
31
- if length == -1
32
- break
33
- else
34
- i = 0
34
+ total_img = parsed_content.css('div.hub-totalinfo').text.split(' HD Wallpapers')[0].to_i
35
+ if max == nil || max > total_img
36
+ max = total_img
37
+ puts "This tag has #{total_img} pictures"
38
+ end
39
+ i = 0
35
40
  for i in i..length
36
- nameimg = File.basename(parsed_content.css('.item').to_a[i].children.children.first.to_h['src']).delete('.jpg!s')
37
- # Get url img
38
- urlimg = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
39
- open(urlimg) do |image|
40
- File.open("#{description}#{nameimg}", "w+") do |file|
41
- file.write(image.read)
42
- #Get info img
43
- n = nameimg
44
- ui = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
45
- ex = File.extname(parsed_content.css('.item').to_a[i].children.children.first.to_h['src']).delete('.!s')
46
- size = File.size("#{description}#{nameimg}")
47
- s = "#{size} kb"
48
- row = {'stt'=>i, 'name'=>n, 'url'=>ui, 'extension'=>ex, 'size'=>s}
49
- data.push(row)
50
- end
51
- end
52
- sum += 1
53
- if max == sum
54
- break
55
- end
56
- end
41
+ urlimg = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
42
+ images.push(urlimg)
43
+
44
+ print '.'
45
+ sum += 1
46
+ if max == sum
47
+ break
48
+ end
49
+ end
50
+ index += 1
57
51
  end
58
- index += 1
52
+ download(images,destination)
53
+ end
54
+
55
+ def download(images,destination)
56
+ data = []
57
+ row = {}
58
+ thread = []
59
+ images.each do |img|
60
+ thread << Thread.new(img) do
61
+ timeout = 0
62
+ begin
63
+ open(img) do |image|
64
+ nameimg = File.basename(img,".jpg!s")
65
+ ui = img
66
+ ex = File.extname(img).delete('.!s')
67
+ size = ""
68
+ File.open("#{destination}#{nameimg}","wb") do |file|
69
+ file.write(image.read)
70
+ size = image.size
71
+ end
72
+ size = size.to_s + " bytes"
73
+ row = {"name"=>nameimg, "url"=>ui, "extension"=> ex, "size" => size}
74
+ data.push(row)
75
+ end
76
+ rescue => exception
77
+ if timeout < 3
78
+ timeout += 1
79
+ retry
80
+ else
81
+ next
82
+ end
83
+ end
59
84
  end
60
- savedata(data,description)
61
- end
62
- def crawl(key,description,max = nil)
63
- mutithread = (0..2).map do
64
- Thread.new do
65
- crawldata(key,description,max)
66
- end
67
- end
68
- mutithread.map(&:join)
69
- end
85
+ end
86
+ thread.each {|t| t.join}
87
+ puts " "
88
+ puts "Download successfully"
89
+ savedata(data,destination)
70
90
  end
71
91
  end
72
-
92
+ end
@@ -0,0 +1,17 @@
1
+ require 'thor'
2
+ require_relative '../tomosia_wallhere_crawl'
3
+
4
+ module TomosiaWallhereCrawl
5
+ class Cli < Thor
6
+ desc "crawl KEYWORD", "enter KEYWORD to search"
7
+ option :destination
8
+ option :max
9
+ def crawl(keyword)
10
+ if options[:max] == nil
11
+ TomosiaWallhereCrawl::CrawlImage.new.crawldata(keyword, options[:destination], options[:max])
12
+ else
13
+ TomosiaWallhereCrawl::CrawlImage.new.crawldata(keyword, options[:destination], options[:max].to_i)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module TomosiaWallhereCrawl
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -11,12 +11,17 @@ Gem::Specification.new do |spec|
11
11
  spec.homepage = "https://github.com/nguyensontung183183/tomosia_wallhere_crawl.git"
12
12
  spec.license = "MIT"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
- # Specify which files should be added to the gem when it is released.
15
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
16
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
- end
14
+
15
+
16
+ spec.files = `git ls-files`.split("\n")
19
17
  spec.bindir = "exe"
20
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.executables = ["tomosia_wallhere_crawl"]
21
19
  spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'thor'
22
+ spec.add_runtime_dependency 'nokogiri'
23
+ spec.add_runtime_dependency 'writeexcel'
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
22
27
  end
metadata CHANGED
@@ -1,31 +1,121 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_wallhere_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - nguyen son tung
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-05 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-08-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: writeexcel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
13
97
  description: Write a longer description or delete this line.
14
98
  email:
15
99
  - nguyensontung18183
16
- executables: []
100
+ executables:
101
+ - tomosia_wallhere_crawl
17
102
  extensions: []
18
103
  extra_rdoc_files: []
19
104
  files:
20
105
  - CODE_OF_CONDUCT.md
21
106
  - Gemfile
107
+ - Gemfile.lock
22
108
  - LICENSE.txt
23
109
  - README.md
24
110
  - Rakefile
25
111
  - bin/console
26
112
  - bin/setup
113
+ - exe/tomosia_wallhere_crawl
27
114
  - lib/tomosia_wallhere_crawl.rb
115
+ - lib/tomosia_wallhere_crawl/cli.rb
28
116
  - lib/tomosia_wallhere_crawl/version.rb
117
+ - pkg/tomosia_wallhere_crawl-0.1.1.gem
118
+ - tomosia_wallhere_crawl-0.1.0.gem
29
119
  - tomosia_wallhere_crawl.gemspec
30
120
  homepage: https://github.com/nguyensontung183183/tomosia_wallhere_crawl.git
31
121
  licenses: