tomosia_amanaplus_crawl 0.1.6 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 652e0f9706c5ac86c8d0bd42cccf006e427d50f4d8a552f3c2ce44d8b4999552
4
- data.tar.gz: 1ffa0934bc0cd16c91ab0c0d75a090e1597698e66708440f36ac96d8ea2411b9
3
+ metadata.gz: 43ec94c9458a2aaf5554bf3eac7728fd8fe4a7e3850a061636ef5bf73569965e
4
+ data.tar.gz: d532f0301d3862807918bd3f1785bf0770302702dfbc4d0d0240f1ac1397a9fa
5
5
  SHA512:
6
- metadata.gz: 7f944b442c13abaa7c010e9b05c64433da2ac68f6bdd36e04ba84048ea5816c7d4de80f7f7da24ea3b2984c222489e61e9104dc49b895231f015f2d7e3e12fde
7
- data.tar.gz: d32bf4d1edcf26f496cb9cece68a160d3c113e063f7a1442a02f8ff917f28937f0574ce050132c3ea92daf4f5490eb533c195d77ff92f18bfee196a8bddd2b25
6
+ metadata.gz: d5f54508d8a41443488ae98f9ba966317ad84a001ce1abbc4ad12e5f8c01a6e939670c5900dfd4467e7a16bb215942c96a44c01103ec906f21d839a42f78337a
7
+ data.tar.gz: 4649b6ebed46e5aaddb54d3e2337e88bc4a98cd0399efbc2b4e49aaeaef088bb38eb29e592d60de6d6c7ad4ab090039558860514946f754de491f39427ae06ed
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tomosia_amanaplus_crawl (0.1.5)
4
+ tomosia_amanaplus_crawl (0.1.8)
5
5
  httparty (= 0.18.1)
6
6
  nokogiri (= 1.10.10)
7
7
  spreadsheet (= 1.2.6)
8
+ thor
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
@@ -37,6 +38,7 @@ GEM
37
38
  ruby-ole (1.2.12.2)
38
39
  spreadsheet (1.2.6)
39
40
  ruby-ole (>= 1.0)
41
+ thor (1.0.1)
40
42
 
41
43
  PLATFORMS
42
44
  ruby
data/README.md CHANGED
@@ -23,9 +23,9 @@ Or install it yourself as:
23
23
  ## Usage
24
24
 
25
25
  ```ruby
26
- require 'tomosia_amanaplus_crawl'
27
- TomosiaAmanaplusCrawl::Crawler.new.run(keyword, path, max)
26
+ tomosia_amanaplus_crawl crawl "keyword" --destination "/home/usr/Documents" --max=123
28
27
  ```
28
+ Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
29
29
  keyword: hoian, danang, ...
30
30
  path: './', '/desktop/', ...
31
31
  max: số lượng ảnh muốn lấy về. Nếu max lớn hơn tổng số ảnh các page thì vẫn lấy hết tất cả ảnh
data/Rakefile CHANGED
@@ -4,3 +4,9 @@ require "rspec/core/rake_task"
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
6
  task :default => :spec
7
+
8
+ namespace :gem do
9
+ task :build do
10
+ system "rake build && rake install"
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tomosia_amanaplus_crawl/cli'
4
+
5
+ TomosiaAmanaplusCrawl::Cli.start
@@ -2,16 +2,15 @@ require "tomosia_amanaplus_crawl/version"
2
2
 
3
3
  module TomosiaAmanaplusCrawl
4
4
  require 'nokogiri'
5
- require 'httparty'
6
5
  require 'open-uri'
7
6
  require 'fileutils'
8
7
  require 'spreadsheet'
9
8
 
10
9
  class Crawler
11
- URL = "https://plus.amanaimages.com/items/search/"
10
+ URL = "https://plus.amanaimages.com/items/search"
12
11
 
13
12
  def run(keyword, destination, max)
14
- unparsed_page = HTTParty.get("#{URL}/#{keyword}")
13
+ unparsed_page = open("#{URL}/#{keyword}").read
15
14
  parsed_page = Nokogiri::HTML(unparsed_page)
16
15
 
17
16
  pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
@@ -19,7 +18,7 @@ module TomosiaAmanaplusCrawl
19
18
 
20
19
  # lấy tổng số image
21
20
  total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
22
- total = total[11..(total.length - 1)].chop.chop.chop.sub(',', '').to_i
21
+ total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
23
22
  if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
24
23
  max = total
25
24
  end
@@ -36,7 +35,7 @@ module TomosiaAmanaplusCrawl
36
35
  while curr_page <= pages
37
36
  puts "Crawling page #{curr_page}..........."
38
37
 
39
- pagination_unparsed_page = HTTParty.get("https://plus.amanaimages.com/items/search/#{keyword}?page=#{curr_page}")
38
+ pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
40
39
  pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
41
40
  pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
42
41
 
@@ -63,20 +62,30 @@ module TomosiaAmanaplusCrawl
63
62
 
64
63
  # tải hình và cập nhật lại size
65
64
  def downloadImages(images, destination)
66
- path = "#{destination}/Downloads" # lưu hình ở folder Downloads
65
+ path = "#{destination}//Downloads" # lưu hình ở folder Downloads
67
66
  Dir.mkdir path unless File.exist? path
68
67
 
69
68
  threads = []
70
69
  print "\nDownloading"
71
70
  images.each do |curr_image|
72
71
  threads << Thread.new(curr_image) {
73
- open(curr_image[:url]) do |image|
74
- File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
75
- file.write(image.read) # lưu hình ảnh
76
- curr_image[:size] = image.size # cập nhật lại size trong mảng images
77
- print "."
72
+ timeout = 0
73
+ begin
74
+ URI.open(curr_image[:url]) do |image|
75
+ File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
76
+ file.write(image.read) # lưu hình ảnh
77
+ curr_image[:size] = image.size # cập nhật lại size trong mảng images
78
+ print "."
79
+ end
80
+ end # end open
81
+ rescue => exception
82
+ if timeout < 3
83
+ timeout += 1
84
+ retry
85
+ else
86
+ next
78
87
  end
79
- end # end open
88
+ end
80
89
  }
81
90
  end
82
91
  threads.each { |t| t.join }
@@ -84,7 +93,7 @@ module TomosiaAmanaplusCrawl
84
93
  end
85
94
 
86
95
  def writeToExcel(images, destination)
87
- path = "#{destination}/File Excel" # lưu file ở folder File Excel
96
+ path = "#{destination}//File Excel" # lưu file ở folder File Excel
88
97
  Dir.mkdir path unless File.exist? path
89
98
 
90
99
  book = Spreadsheet::Workbook.new
@@ -103,8 +112,3 @@ module TomosiaAmanaplusCrawl
103
112
 
104
113
  end
105
114
  end
106
-
107
- def TomosiaAmanaplusCrawl(keyword = "Yêu", destination = "chị", max = "My")
108
- puts "msg: #{keyword} #{destination} #{you}"
109
- # TomosiaAmanaplusCrawl::Crawler.new.run(keyword, destination, max)
110
- end
@@ -0,0 +1,14 @@
1
+ require 'thor'
2
+ require_relative '../tomosia_amanaplus_crawl'
3
+
4
+ module TomosiaAmanaplusCrawl
5
+ class Cli < Thor
6
+
7
+ desc "crawl KEYWORD", "enter KEYWORD to search"
8
+ option :destination
9
+ option :max
10
+ def crawl(keyword)
11
+ TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.1.6"
2
+ VERSION = "0.2.2"
3
3
  end
@@ -0,0 +1,14 @@
1
+ require "bundler/setup"
2
+ require "tomosia_amanaplus_crawl"
3
+
4
+ RSpec.configure do |config|
5
+ # Enable flags like --only-failures and --next-failure
6
+ config.example_status_persistence_file_path = ".rspec_status"
7
+
8
+ # Disable RSpec exposing methods globally on `Module` and `main`
9
+ config.disable_monkey_patching!
10
+
11
+ config.expect_with :rspec do |c|
12
+ c.syntax = :expect
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ RSpec.describe TomosiaAmanaplusCrawl do
2
+ it "has a version number" do
3
+ expect(TomosiaAmanaplusCrawl::VERSION).not_to be nil
4
+ end
5
+
6
+ it "does something useful" do
7
+ expect(false).to eq(true)
8
+ end
9
+ end
@@ -9,13 +9,12 @@ Gem::Specification.new do |spec|
9
9
  spec.homepage = "https://github.com/tthuydang/tomosia_amanaplus_crawl"
10
10
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
11
11
 
12
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
13
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
14
- end
15
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
12
+ spec.files = `git ls-files`.split("\n")
13
+ spec.bindir = "exe"
14
+ spec.executables = 'tomosia_amanaplus_crawl'
16
15
  spec.require_paths = ["lib"]
17
16
 
18
- spec.add_runtime_dependency('httparty', '0.18.1')
19
17
  spec.add_runtime_dependency('nokogiri', '1.10.10')
20
18
  spec.add_runtime_dependency('spreadsheet', '1.2.6')
19
+ spec.add_runtime_dependency('thor')
21
20
  end
metadata CHANGED
@@ -1,60 +1,61 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-07 00:00:00.000000000 Z
11
+ date: 2020-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: httparty
14
+ name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.18.1
19
+ version: 1.10.10
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.18.1
26
+ version: 1.10.10
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: spreadsheet
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 1.10.10
33
+ version: 1.2.6
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 1.10.10
40
+ version: 1.2.6
41
41
  - !ruby/object:Gem::Dependency
42
- name: spreadsheet
42
+ name: thor
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.2.6
47
+ version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 1.2.6
54
+ version: '0'
55
55
  description:
56
56
  email:
57
- executables: []
57
+ executables:
58
+ - tomosia_amanaplus_crawl
58
59
  extensions: []
59
60
  extra_rdoc_files: []
60
61
  files:
@@ -69,8 +70,12 @@ files:
69
70
  - Rakefile
70
71
  - bin/console
71
72
  - bin/setup
73
+ - exe/tomosia_amanaplus_crawl
72
74
  - lib/tomosia_amanaplus_crawl.rb
75
+ - lib/tomosia_amanaplus_crawl/cli.rb
73
76
  - lib/tomosia_amanaplus_crawl/version.rb
77
+ - spec/spec_helper.rb
78
+ - spec/tomosia_amanaplus_crawl_spec.rb
74
79
  - tomosia_amanaplus_crawl.gemspec
75
80
  homepage: https://github.com/tthuydang/tomosia_amanaplus_crawl
76
81
  licenses: []