tomosia_amanaplus_crawl 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 793268d3d7e8d3c1f3fd17ca9f1283839dda2a20dfb1a8769c65fd6d25bf0236
4
- data.tar.gz: 115062f473eb2ca55c9ceea17265b73827b53feb548a48861b808b9baa64447f
3
+ metadata.gz: a3d62cab8ce5e9076b05594851cde7da03d627b8c386dfcbc57e64d561e7569a
4
+ data.tar.gz: 7ce113e8fde42d24405c662998c07cc6992a7d7e7ce94816590ef0a3f634ce0f
5
5
  SHA512:
6
- metadata.gz: 83ee2ae24471817f907373dd2130fd54726786eb2fca7732e9fbe10ec841b4b5cbfc3fa4925753d725eb23362c330cec055a8b43eb8f0321053e7ce497f77930
7
- data.tar.gz: 5b92ea21908fa8288a2fa26af61c8c198b525f0de6f6aba7766e89931f064edd0bd37059d2fc205b8090bf0aac2825865fa5fe9ce8226dd7003efb74f55ecf2b
6
+ metadata.gz: c4841b8f6948aa3a2afc7456380d26b374157ce5a453d9eb0bab6141ff5fb71b88cd3e6f51e0556cfb43421cef12c572a17a4a05f5d4d00404d19b8af452d079
7
+ data.tar.gz: 407ea07476e5fcbc4f356f540d975a4d15d36aab8ae02103e906575dc1153c257b2ada9f1d9309f39d9557e9109bfe464eae94680715143259af5b41ec6c4d74
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tomosia_amanaplus_crawl (0.1.5)
4
+ tomosia_amanaplus_crawl (0.1.8)
5
5
  httparty (= 0.18.1)
6
6
  nokogiri (= 1.10.10)
7
7
  spreadsheet (= 1.2.6)
8
+ thor
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
@@ -37,6 +38,7 @@ GEM
37
38
  ruby-ole (1.2.12.2)
38
39
  spreadsheet (1.2.6)
39
40
  ruby-ole (>= 1.0)
41
+ thor (1.0.1)
40
42
 
41
43
  PLATFORMS
42
44
  ruby
data/README.md CHANGED
@@ -23,9 +23,9 @@ Or install it yourself as:
23
23
  ## Usage
24
24
 
25
25
  ```ruby
26
- require 'tomosia_amanaplus_crawl'
27
- TomosiaAmanaplusCrawl::Crawler.new.run(keyword, path, max)
26
+ tomosia_amanaplus_crawl crawl "keyword" --destination "./lib" --max=123
28
27
  ```
28
+ Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
29
29
  keyword: hoian, danang, ...
30
30
  path: './', '/desktop/', ...
31
31
  max: số lượng ảnh muốn lấy về. Nếu max lớn hơn tổng số ảnh các page thì vẫn lấy hết tất cả ảnh
data/Rakefile CHANGED
@@ -4,3 +4,9 @@ require "rspec/core/rake_task"
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
6
  task :default => :spec
7
+
8
+ namespace :gem do
9
+ task :build do
10
+ system "rake build && rake install"
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tomosia_amanaplus_crawl/cli'
4
+
5
+ TomosiaAmanaplusCrawl::Cli.start
@@ -1,10 +1,6 @@
1
1
  require "tomosia_amanaplus_crawl/version"
2
2
 
3
3
  module TomosiaAmanaplusCrawl
4
- def self.yeuNgucLep
5
- puts "Yeu chi My nhieu lam"
6
- end
7
-
8
4
  require 'nokogiri'
9
5
  require 'httparty'
10
6
  require 'open-uri'
@@ -12,7 +8,7 @@ module TomosiaAmanaplusCrawl
12
8
  require 'spreadsheet'
13
9
 
14
10
  class Crawler
15
- URL = "https://plus.amanaimages.com/items/search/"
11
+ URL = "https://plus.amanaimages.com/items/search"
16
12
 
17
13
  def run(keyword, destination, max)
18
14
  unparsed_page = HTTParty.get("#{URL}/#{keyword}")
@@ -23,7 +19,7 @@ module TomosiaAmanaplusCrawl
23
19
 
24
20
  # lấy tổng số image
25
21
  total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
26
- total = total[11..(total.length - 1)].chop.chop.chop.sub(',', '').to_i
22
+ total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
27
23
  if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
28
24
  max = total
29
25
  end
@@ -74,13 +70,23 @@ module TomosiaAmanaplusCrawl
74
70
  print "\nDownloading"
75
71
  images.each do |curr_image|
76
72
  threads << Thread.new(curr_image) {
77
- open(curr_image[:url]) do |image|
78
- File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
79
- file.write(image.read) # lưu hình ảnh
80
- curr_image[:size] = image.size # cập nhật lại size trong mảng images
81
- print "."
73
+ timeout = 0
74
+ begin
75
+ open(curr_image[:url]) do |image|
76
+ File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
77
+ file.write(image.read) # lưu hình ảnh
78
+ curr_image[:size] = image.size # cập nhật lại size trong mảng images
79
+ print "."
80
+ end
81
+ end # end open
82
+ rescue => exception
83
+ if timeout < 3
84
+ timeout += 1
85
+ retry
86
+ else
87
+ next
82
88
  end
83
- end # end open
89
+ end
84
90
  }
85
91
  end
86
92
  threads.each { |t| t.join }
@@ -0,0 +1,14 @@
1
+ require 'thor'
2
+ require_relative '../tomosia_amanaplus_crawl'
3
+
4
+ module TomosiaAmanaplusCrawl
5
+ class Cli < Thor
6
+
7
+ desc "crawl KEYWORD", "enter KEYWORD to search"
8
+ option :destination
9
+ option :max
10
+ def crawl(keyword)
11
+ TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -0,0 +1,14 @@
1
+ require "bundler/setup"
2
+ require "tomosia_amanaplus_crawl"
3
+
4
+ RSpec.configure do |config|
5
+ # Enable flags like --only-failures and --next-failure
6
+ config.example_status_persistence_file_path = ".rspec_status"
7
+
8
+ # Disable RSpec exposing methods globally on `Module` and `main`
9
+ config.disable_monkey_patching!
10
+
11
+ config.expect_with :rspec do |c|
12
+ c.syntax = :expect
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ RSpec.describe TomosiaAmanaplusCrawl do
2
+ it "has a version number" do
3
+ expect(TomosiaAmanaplusCrawl::VERSION).not_to be nil
4
+ end
5
+
6
+ it "does something useful" do
7
+ expect(false).to eq(true)
8
+ end
9
+ end
@@ -9,13 +9,13 @@ Gem::Specification.new do |spec|
9
9
  spec.homepage = "https://github.com/tthuydang/tomosia_amanaplus_crawl"
10
10
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
11
11
 
12
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
13
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
14
- end
15
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
12
+ spec.files = `git ls-files`.split("\n")
13
+ spec.bindir = "exe"
14
+ spec.executables = 'tomosia_amanaplus_crawl'
16
15
  spec.require_paths = ["lib"]
17
16
 
18
17
  spec.add_runtime_dependency('httparty', '0.18.1')
19
18
  spec.add_runtime_dependency('nokogiri', '1.10.10')
20
19
  spec.add_runtime_dependency('spreadsheet', '1.2.6')
20
+ spec.add_runtime_dependency('thor')
21
21
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-07 00:00:00.000000000 Z
11
+ date: 2020-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -52,9 +52,24 @@ dependencies:
52
52
  - - '='
53
53
  - !ruby/object:Gem::Version
54
54
  version: 1.2.6
55
+ - !ruby/object:Gem::Dependency
56
+ name: thor
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description:
56
70
  email:
57
- executables: []
71
+ executables:
72
+ - tomosia_amanaplus_crawl
58
73
  extensions: []
59
74
  extra_rdoc_files: []
60
75
  files:
@@ -69,8 +84,12 @@ files:
69
84
  - Rakefile
70
85
  - bin/console
71
86
  - bin/setup
87
+ - exe/tomosia_amanaplus_crawl
72
88
  - lib/tomosia_amanaplus_crawl.rb
89
+ - lib/tomosia_amanaplus_crawl/cli.rb
73
90
  - lib/tomosia_amanaplus_crawl/version.rb
91
+ - spec/spec_helper.rb
92
+ - spec/tomosia_amanaplus_crawl_spec.rb
74
93
  - tomosia_amanaplus_crawl.gemspec
75
94
  homepage: https://github.com/tthuydang/tomosia_amanaplus_crawl
76
95
  licenses: []