tomosia_amanaplus_crawl 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/tomosia_amanaplus_crawl.rb +3 -3
- data/lib/tomosia_amanaplus_crawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0f30081ca8543573b8f1f16c2aaf77384d899152d4f5f424f5008dfe1c7c0fc
|
4
|
+
data.tar.gz: a819309522dd41b9346a0c43fe538bbd1b380b7524e0a3cac2dc00cdf4193d48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23afb7fba2307a0107b549a9e060fe4959de8f89dfad79cd63c0b0c4f2d7627ace2ef447b830ad7ad22c42ed9fb9a9b16b37a73d102386b01489a9046263d64d
|
7
|
+
data.tar.gz: a2d1201db75b05b6d80fa1a17fa4a0594283d92ad928c2dc22c214305877447ce2aa05685aa57e4bf6a06155416bda62e0d70a3b77dda9d02a8ace6714f09b74
|
data/README.md
CHANGED
@@ -23,7 +23,7 @@ Or install it yourself as:
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
```ruby
|
26
|
-
tomosia_amanaplus_crawl crawl "keyword" --destination "
|
26
|
+
tomosia_amanaplus_crawl crawl "keyword" --destination "/home/usr/Documents" --max=123
|
27
27
|
```
|
28
28
|
Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
|
29
29
|
keyword: hoian, danang, ...
|
@@ -11,7 +11,7 @@ module TomosiaAmanaplusCrawl
|
|
11
11
|
URL = "https://plus.amanaimages.com/items/search"
|
12
12
|
|
13
13
|
def run(keyword, destination, max)
|
14
|
-
unparsed_page =
|
14
|
+
unparsed_page = open("#{URL}/#{keyword}").read
|
15
15
|
parsed_page = Nokogiri::HTML(unparsed_page)
|
16
16
|
|
17
17
|
pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
|
@@ -36,7 +36,7 @@ module TomosiaAmanaplusCrawl
|
|
36
36
|
while curr_page <= pages
|
37
37
|
puts "Crawling page #{curr_page}..........."
|
38
38
|
|
39
|
-
pagination_unparsed_page =
|
39
|
+
pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
|
40
40
|
pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
|
41
41
|
pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
|
42
42
|
|
@@ -72,7 +72,7 @@ module TomosiaAmanaplusCrawl
|
|
72
72
|
threads << Thread.new(curr_image) {
|
73
73
|
timeout = 0
|
74
74
|
begin
|
75
|
-
open(curr_image[:url]) do |image|
|
75
|
+
URI.open(curr_image[:url]) do |image|
|
76
76
|
File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
|
77
77
|
file.write(image.read) # lưu hình ảnh
|
78
78
|
curr_image[:size] = image.size # cập nhật lại size trong mảng images
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_amanaplus_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nhat Huy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|