tomosia_amanaplus_crawl 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -1
- data/README.md +2 -2
- data/Rakefile +6 -0
- data/exe/tomosia_amanaplus_crawl +5 -0
- data/lib/tomosia_amanaplus_crawl.rb +18 -12
- data/lib/tomosia_amanaplus_crawl/cli.rb +14 -0
- data/lib/tomosia_amanaplus_crawl/version.rb +1 -1
- data/spec/spec_helper.rb +14 -0
- data/spec/tomosia_amanaplus_crawl_spec.rb +9 -0
- data/tomosia_amanaplus_crawl.gemspec +4 -4
- metadata +23 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3d62cab8ce5e9076b05594851cde7da03d627b8c386dfcbc57e64d561e7569a
|
4
|
+
data.tar.gz: 7ce113e8fde42d24405c662998c07cc6992a7d7e7ce94816590ef0a3f634ce0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4841b8f6948aa3a2afc7456380d26b374157ce5a453d9eb0bab6141ff5fb71b88cd3e6f51e0556cfb43421cef12c572a17a4a05f5d4d00404d19b8af452d079
|
7
|
+
data.tar.gz: 407ea07476e5fcbc4f356f540d975a4d15d36aab8ae02103e906575dc1153c257b2ada9f1d9309f39d9557e9109bfe464eae94680715143259af5b41ec6c4d74
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tomosia_amanaplus_crawl (0.1.
|
4
|
+
tomosia_amanaplus_crawl (0.1.8)
|
5
5
|
httparty (= 0.18.1)
|
6
6
|
nokogiri (= 1.10.10)
|
7
7
|
spreadsheet (= 1.2.6)
|
8
|
+
thor
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
@@ -37,6 +38,7 @@ GEM
|
|
37
38
|
ruby-ole (1.2.12.2)
|
38
39
|
spreadsheet (1.2.6)
|
39
40
|
ruby-ole (>= 1.0)
|
41
|
+
thor (1.0.1)
|
40
42
|
|
41
43
|
PLATFORMS
|
42
44
|
ruby
|
data/README.md
CHANGED
@@ -23,9 +23,9 @@ Or install it yourself as:
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
```ruby
|
26
|
-
|
27
|
-
TomosiaAmanaplusCrawl::Crawler.new.run(keyword, path, max)
|
26
|
+
tomosia_amanaplus_crawl crawl "keyword" --destination "./lib" --max=123
|
28
27
|
```
|
28
|
+
Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
|
29
29
|
keyword: hoian, danang, ...
|
30
30
|
path: './', '/desktop/', ...
|
31
31
|
max: số lượng ảnh muốn lấy về. Nếu max lớn hơn tổng số ảnh các page thì vẫn lấy hết tất cả ảnh
|
data/Rakefile
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
require "tomosia_amanaplus_crawl/version"
|
2
2
|
|
3
3
|
module TomosiaAmanaplusCrawl
|
4
|
-
def self.yeuNgucLep
|
5
|
-
puts "Yeu chi My nhieu lam"
|
6
|
-
end
|
7
|
-
|
8
4
|
require 'nokogiri'
|
9
5
|
require 'httparty'
|
10
6
|
require 'open-uri'
|
@@ -12,7 +8,7 @@ module TomosiaAmanaplusCrawl
|
|
12
8
|
require 'spreadsheet'
|
13
9
|
|
14
10
|
class Crawler
|
15
|
-
URL = "https://plus.amanaimages.com/items/search
|
11
|
+
URL = "https://plus.amanaimages.com/items/search"
|
16
12
|
|
17
13
|
def run(keyword, destination, max)
|
18
14
|
unparsed_page = HTTParty.get("#{URL}/#{keyword}")
|
@@ -23,7 +19,7 @@ module TomosiaAmanaplusCrawl
|
|
23
19
|
|
24
20
|
# lấy tổng số image
|
25
21
|
total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
|
26
|
-
total = total[
|
22
|
+
total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
|
27
23
|
if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
|
28
24
|
max = total
|
29
25
|
end
|
@@ -74,13 +70,23 @@ module TomosiaAmanaplusCrawl
|
|
74
70
|
print "\nDownloading"
|
75
71
|
images.each do |curr_image|
|
76
72
|
threads << Thread.new(curr_image) {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
curr_image[:
|
81
|
-
|
73
|
+
timeout = 0
|
74
|
+
begin
|
75
|
+
open(curr_image[:url]) do |image|
|
76
|
+
File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
|
77
|
+
file.write(image.read) # lưu hình ảnh
|
78
|
+
curr_image[:size] = image.size # cập nhật lại size trong mảng images
|
79
|
+
print "."
|
80
|
+
end
|
81
|
+
end # end open
|
82
|
+
rescue => exception
|
83
|
+
if timeout < 3
|
84
|
+
timeout += 1
|
85
|
+
retry
|
86
|
+
else
|
87
|
+
next
|
82
88
|
end
|
83
|
-
end
|
89
|
+
end
|
84
90
|
}
|
85
91
|
end
|
86
92
|
threads.each { |t| t.join }
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require_relative '../tomosia_amanaplus_crawl'
|
3
|
+
|
4
|
+
module TomosiaAmanaplusCrawl
|
5
|
+
class Cli < Thor
|
6
|
+
|
7
|
+
desc "crawl KEYWORD", "enter KEYWORD to search"
|
8
|
+
option :destination
|
9
|
+
option :max
|
10
|
+
def crawl(keyword)
|
11
|
+
TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "tomosia_amanaplus_crawl"
|
3
|
+
|
4
|
+
RSpec.configure do |config|
|
5
|
+
# Enable flags like --only-failures and --next-failure
|
6
|
+
config.example_status_persistence_file_path = ".rspec_status"
|
7
|
+
|
8
|
+
# Disable RSpec exposing methods globally on `Module` and `main`
|
9
|
+
config.disable_monkey_patching!
|
10
|
+
|
11
|
+
config.expect_with :rspec do |c|
|
12
|
+
c.syntax = :expect
|
13
|
+
end
|
14
|
+
end
|
@@ -9,13 +9,13 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.homepage = "https://github.com/tthuydang/tomosia_amanaplus_crawl"
|
10
10
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
11
11
|
|
12
|
-
spec.files =
|
13
|
-
|
14
|
-
|
15
|
-
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
12
|
+
spec.files = `git ls-files`.split("\n")
|
13
|
+
spec.bindir = "exe"
|
14
|
+
spec.executables = 'tomosia_amanaplus_crawl'
|
16
15
|
spec.require_paths = ["lib"]
|
17
16
|
|
18
17
|
spec.add_runtime_dependency('httparty', '0.18.1')
|
19
18
|
spec.add_runtime_dependency('nokogiri', '1.10.10')
|
20
19
|
spec.add_runtime_dependency('spreadsheet', '1.2.6')
|
20
|
+
spec.add_runtime_dependency('thor')
|
21
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_amanaplus_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nhat Huy
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -52,9 +52,24 @@ dependencies:
|
|
52
52
|
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 1.2.6
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: thor
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description:
|
56
70
|
email:
|
57
|
-
executables:
|
71
|
+
executables:
|
72
|
+
- tomosia_amanaplus_crawl
|
58
73
|
extensions: []
|
59
74
|
extra_rdoc_files: []
|
60
75
|
files:
|
@@ -69,8 +84,12 @@ files:
|
|
69
84
|
- Rakefile
|
70
85
|
- bin/console
|
71
86
|
- bin/setup
|
87
|
+
- exe/tomosia_amanaplus_crawl
|
72
88
|
- lib/tomosia_amanaplus_crawl.rb
|
89
|
+
- lib/tomosia_amanaplus_crawl/cli.rb
|
73
90
|
- lib/tomosia_amanaplus_crawl/version.rb
|
91
|
+
- spec/spec_helper.rb
|
92
|
+
- spec/tomosia_amanaplus_crawl_spec.rb
|
74
93
|
- tomosia_amanaplus_crawl.gemspec
|
75
94
|
homepage: https://github.com/tthuydang/tomosia_amanaplus_crawl
|
76
95
|
licenses: []
|