tomosia_amanaplus_crawl 0.1.4 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2832ce228e9bc2ebcce58c1dde236cc0f6bfa4c789654e976cb66defe368fb39
4
- data.tar.gz: 790070002318d2d3c3727b9fd3986b655624579d3130e32e0ba6c88bd65485cd
3
+ metadata.gz: a3d62cab8ce5e9076b05594851cde7da03d627b8c386dfcbc57e64d561e7569a
4
+ data.tar.gz: 7ce113e8fde42d24405c662998c07cc6992a7d7e7ce94816590ef0a3f634ce0f
5
5
  SHA512:
6
- metadata.gz: 8b174ec110e8933aae815abef670ae0a33ae69077b98268522aaf83a52702f7d3c303cc9bbf73d5e8ec7d27a9cb2deb1cd1d05e6edfc9b5ecff8fa94b49af4d9
7
- data.tar.gz: badfef5587e0b7989a313caf4490f59efba61d9e743f16126ae1bd7915abd7d878be1467bbcef967c4968c590c7c4383f542c0a8a51f5bf07783759ae4b216da
6
+ metadata.gz: c4841b8f6948aa3a2afc7456380d26b374157ce5a453d9eb0bab6141ff5fb71b88cd3e6f51e0556cfb43421cef12c572a17a4a05f5d4d00404d19b8af452d079
7
+ data.tar.gz: 407ea07476e5fcbc4f356f540d975a4d15d36aab8ae02103e906575dc1153c257b2ada9f1d9309f39d9557e9109bfe464eae94680715143259af5b41ec6c4d74
data/Gemfile CHANGED
@@ -5,7 +5,3 @@ gemspec
5
5
 
6
6
  gem "rake", "~> 12.0"
7
7
  gem "rspec", "~> 3.0"
8
-
9
- gem 'httparty'
10
- gem 'nokogiri'
11
- gem 'spreadsheet'
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tomosia_amanaplus_crawl (0.1.8)
5
+ httparty (= 0.18.1)
6
+ nokogiri (= 1.10.10)
7
+ spreadsheet (= 1.2.6)
8
+ thor
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ diff-lcs (1.4.4)
14
+ httparty (0.18.1)
15
+ mime-types (~> 3.0)
16
+ multi_xml (>= 0.5.2)
17
+ mime-types (3.3.1)
18
+ mime-types-data (~> 3.2015)
19
+ mime-types-data (3.2020.0512)
20
+ mini_portile2 (2.4.0)
21
+ multi_xml (0.6.0)
22
+ nokogiri (1.10.10)
23
+ mini_portile2 (~> 2.4.0)
24
+ rake (12.3.3)
25
+ rspec (3.9.0)
26
+ rspec-core (~> 3.9.0)
27
+ rspec-expectations (~> 3.9.0)
28
+ rspec-mocks (~> 3.9.0)
29
+ rspec-core (3.9.2)
30
+ rspec-support (~> 3.9.3)
31
+ rspec-expectations (3.9.2)
32
+ diff-lcs (>= 1.2.0, < 2.0)
33
+ rspec-support (~> 3.9.0)
34
+ rspec-mocks (3.9.1)
35
+ diff-lcs (>= 1.2.0, < 2.0)
36
+ rspec-support (~> 3.9.0)
37
+ rspec-support (3.9.3)
38
+ ruby-ole (1.2.12.2)
39
+ spreadsheet (1.2.6)
40
+ ruby-ole (>= 1.0)
41
+ thor (1.0.1)
42
+
43
+ PLATFORMS
44
+ ruby
45
+
46
+ DEPENDENCIES
47
+ rake (~> 12.0)
48
+ rspec (~> 3.0)
49
+ tomosia_amanaplus_crawl!
50
+
51
+ BUNDLED WITH
52
+ 2.1.4
data/README.md CHANGED
@@ -10,9 +10,6 @@ Add this line to your application's Gemfile:
10
10
 
11
11
  ```ruby
12
12
  gem 'tomosia_amanaplus_crawl'
13
- gem 'httparty'
14
- gem 'nokogiri'
15
- gem 'spreadsheet'
16
13
  ```
17
14
 
18
15
  And then execute:
@@ -26,9 +23,9 @@ Or install it yourself as:
26
23
  ## Usage
27
24
 
28
25
  ```ruby
29
- require 'tomosia_amanaplus_crawl'
30
- TomosiaAmanaplusCrawl::Crawler.new.run(keyword, path, max)
26
+ tomosia_amanaplus_crawl crawl "keyword" --destination "./lib" --max=123
31
27
  ```
28
+ Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
32
29
  keyword: hoian, danang, ...
33
30
  path: './', '/desktop/', ...
34
31
  max: số lượng ảnh muốn lấy về. Nếu max lớn hơn tổng số ảnh các page thì vẫn lấy hết tất cả ảnh
data/Rakefile CHANGED
@@ -4,3 +4,9 @@ require "rspec/core/rake_task"
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
6
  task :default => :spec
7
+
8
+ namespace :gem do
9
+ task :build do
10
+ system "rake build && rake install"
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tomosia_amanaplus_crawl/cli'
4
+
5
+ TomosiaAmanaplusCrawl::Cli.start
@@ -8,7 +8,7 @@ module TomosiaAmanaplusCrawl
8
8
  require 'spreadsheet'
9
9
 
10
10
  class Crawler
11
- URL = "https://plus.amanaimages.com/items/search/"
11
+ URL = "https://plus.amanaimages.com/items/search"
12
12
 
13
13
  def run(keyword, destination, max)
14
14
  unparsed_page = HTTParty.get("#{URL}/#{keyword}")
@@ -19,7 +19,7 @@ module TomosiaAmanaplusCrawl
19
19
 
20
20
  # lấy tổng số image
21
21
  total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
22
- total = total[11..(total.length - 1)].chop.chop.chop.sub(',', '').to_i
22
+ total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
23
23
  if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
24
24
  max = total
25
25
  end
@@ -70,13 +70,23 @@ module TomosiaAmanaplusCrawl
70
70
  print "\nDownloading"
71
71
  images.each do |curr_image|
72
72
  threads << Thread.new(curr_image) {
73
- open(curr_image[:url]) do |image|
74
- File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
75
- file.write(image.read) # lưu hình ảnh
76
- curr_image[:size] = image.size # cập nhật lại size trong mảng images
77
- print "."
73
+ timeout = 0
74
+ begin
75
+ open(curr_image[:url]) do |image|
76
+ File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
77
+ file.write(image.read) # lưu hình ảnh
78
+ curr_image[:size] = image.size # cập nhật lại size trong mảng images
79
+ print "."
80
+ end
81
+ end # end open
82
+ rescue => exception
83
+ if timeout < 3
84
+ timeout += 1
85
+ retry
86
+ else
87
+ next
78
88
  end
79
- end # end open
89
+ end
80
90
  }
81
91
  end
82
92
  threads.each { |t| t.join }
@@ -0,0 +1,14 @@
1
+ require 'thor'
2
+ require_relative '../tomosia_amanaplus_crawl'
3
+
4
+ module TomosiaAmanaplusCrawl
5
+ class Cli < Thor
6
+
7
+ desc "crawl KEYWORD", "enter KEYWORD to search"
8
+ option :destination
9
+ option :max
10
+ def crawl(keyword)
11
+ TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -0,0 +1,14 @@
1
+ require "bundler/setup"
2
+ require "tomosia_amanaplus_crawl"
3
+
4
+ RSpec.configure do |config|
5
+ # Enable flags like --only-failures and --next-failure
6
+ config.example_status_persistence_file_path = ".rspec_status"
7
+
8
+ # Disable RSpec exposing methods globally on `Module` and `main`
9
+ config.disable_monkey_patching!
10
+
11
+ config.expect_with :rspec do |c|
12
+ c.syntax = :expect
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ RSpec.describe TomosiaAmanaplusCrawl do
2
+ it "has a version number" do
3
+ expect(TomosiaAmanaplusCrawl::VERSION).not_to be nil
4
+ end
5
+
6
+ it "does something useful" do
7
+ expect(false).to eq(true)
8
+ end
9
+ end
@@ -9,9 +9,13 @@ Gem::Specification.new do |spec|
9
9
  spec.homepage = "https://github.com/tthuydang/tomosia_amanaplus_crawl"
10
10
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
11
11
 
12
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
13
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
14
- end
15
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
12
+ spec.files = `git ls-files`.split("\n")
13
+ spec.bindir = "exe"
14
+ spec.executables = 'tomosia_amanaplus_crawl'
16
15
  spec.require_paths = ["lib"]
16
+
17
+ spec.add_runtime_dependency('httparty', '0.18.1')
18
+ spec.add_runtime_dependency('nokogiri', '1.10.10')
19
+ spec.add_runtime_dependency('spreadsheet', '1.2.6')
20
+ spec.add_runtime_dependency('thor')
17
21
  end
metadata CHANGED
@@ -1,18 +1,75 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-06 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-08-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httparty
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.18.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.18.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.10
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.10
41
+ - !ruby/object:Gem::Dependency
42
+ name: spreadsheet
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.2.6
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.2.6
55
+ - !ruby/object:Gem::Dependency
56
+ name: thor
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
13
69
  description:
14
70
  email:
15
- executables: []
71
+ executables:
72
+ - tomosia_amanaplus_crawl
16
73
  extensions: []
17
74
  extra_rdoc_files: []
18
75
  files:
@@ -21,13 +78,18 @@ files:
21
78
  - ".travis.yml"
22
79
  - CODE_OF_CONDUCT.md
23
80
  - Gemfile
81
+ - Gemfile.lock
24
82
  - LICENSE.txt
25
83
  - README.md
26
84
  - Rakefile
27
85
  - bin/console
28
86
  - bin/setup
87
+ - exe/tomosia_amanaplus_crawl
29
88
  - lib/tomosia_amanaplus_crawl.rb
89
+ - lib/tomosia_amanaplus_crawl/cli.rb
30
90
  - lib/tomosia_amanaplus_crawl/version.rb
91
+ - spec/spec_helper.rb
92
+ - spec/tomosia_amanaplus_crawl_spec.rb
31
93
  - tomosia_amanaplus_crawl.gemspec
32
94
  homepage: https://github.com/tthuydang/tomosia_amanaplus_crawl
33
95
  licenses: []