tomosia_amanaplus_crawl 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/tomosia_amanaplus_crawl.rb +3 -3
- data/lib/tomosia_amanaplus_crawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0f30081ca8543573b8f1f16c2aaf77384d899152d4f5f424f5008dfe1c7c0fc
|
4
|
+
data.tar.gz: a819309522dd41b9346a0c43fe538bbd1b380b7524e0a3cac2dc00cdf4193d48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23afb7fba2307a0107b549a9e060fe4959de8f89dfad79cd63c0b0c4f2d7627ace2ef447b830ad7ad22c42ed9fb9a9b16b37a73d102386b01489a9046263d64d
|
7
|
+
data.tar.gz: a2d1201db75b05b6d80fa1a17fa4a0594283d92ad928c2dc22c214305877447ce2aa05685aa57e4bf6a06155416bda62e0d70a3b77dda9d02a8ace6714f09b74
|
data/README.md
CHANGED
@@ -23,7 +23,7 @@ Or install it yourself as:
|
|
23
23
|
## Usage
|
24
24
|
|
25
25
|
```ruby
|
26
|
-
tomosia_amanaplus_crawl crawl "keyword" --destination "
|
26
|
+
tomosia_amanaplus_crawl crawl "keyword" --destination "/home/usr/Documents" --max=123
|
27
27
|
```
|
28
28
|
Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
|
29
29
|
keyword: hoian, danang, ...
|
@@ -11,7 +11,7 @@ module TomosiaAmanaplusCrawl
|
|
11
11
|
URL = "https://plus.amanaimages.com/items/search"
|
12
12
|
|
13
13
|
def run(keyword, destination, max)
|
14
|
-
unparsed_page =
|
14
|
+
unparsed_page = open("#{URL}/#{keyword}").read
|
15
15
|
parsed_page = Nokogiri::HTML(unparsed_page)
|
16
16
|
|
17
17
|
pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
|
@@ -36,7 +36,7 @@ module TomosiaAmanaplusCrawl
|
|
36
36
|
while curr_page <= pages
|
37
37
|
puts "Crawling page #{curr_page}..........."
|
38
38
|
|
39
|
-
pagination_unparsed_page =
|
39
|
+
pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
|
40
40
|
pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
|
41
41
|
pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
|
42
42
|
|
@@ -72,7 +72,7 @@ module TomosiaAmanaplusCrawl
|
|
72
72
|
threads << Thread.new(curr_image) {
|
73
73
|
timeout = 0
|
74
74
|
begin
|
75
|
-
open(curr_image[:url]) do |image|
|
75
|
+
URI.open(curr_image[:url]) do |image|
|
76
76
|
File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
|
77
77
|
file.write(image.read) # lưu hình ảnh
|
78
78
|
curr_image[:size] = image.size # cập nhật lại size trong mảng images
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_amanaplus_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nhat Huy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|