tomosia_amanaplus_crawl 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3d62cab8ce5e9076b05594851cde7da03d627b8c386dfcbc57e64d561e7569a
4
- data.tar.gz: 7ce113e8fde42d24405c662998c07cc6992a7d7e7ce94816590ef0a3f634ce0f
3
+ metadata.gz: c0f30081ca8543573b8f1f16c2aaf77384d899152d4f5f424f5008dfe1c7c0fc
4
+ data.tar.gz: a819309522dd41b9346a0c43fe538bbd1b380b7524e0a3cac2dc00cdf4193d48
5
5
  SHA512:
6
- metadata.gz: c4841b8f6948aa3a2afc7456380d26b374157ce5a453d9eb0bab6141ff5fb71b88cd3e6f51e0556cfb43421cef12c572a17a4a05f5d4d00404d19b8af452d079
7
- data.tar.gz: 407ea07476e5fcbc4f356f540d975a4d15d36aab8ae02103e906575dc1153c257b2ada9f1d9309f39d9557e9109bfe464eae94680715143259af5b41ec6c4d74
6
+ metadata.gz: 23afb7fba2307a0107b549a9e060fe4959de8f89dfad79cd63c0b0c4f2d7627ace2ef447b830ad7ad22c42ed9fb9a9b16b37a73d102386b01489a9046263d64d
7
+ data.tar.gz: a2d1201db75b05b6d80fa1a17fa4a0594283d92ad928c2dc22c214305877447ce2aa05685aa57e4bf6a06155416bda62e0d70a3b77dda9d02a8ace6714f09b74
data/README.md CHANGED
@@ -23,7 +23,7 @@ Or install it yourself as:
23
23
  ## Usage
24
24
 
25
25
  ```ruby
26
- tomosia_amanaplus_crawl crawl "keyword" --destination "./lib" --max=123
26
+ tomosia_amanaplus_crawl crawl "keyword" --destination "/home/usr/Documents" --max=123
27
27
  ```
28
28
  Example: tomosia_amanaplus_crawl crawl "hoian" --destination "./" --max=123
29
29
  keyword: hoian, danang, ...
@@ -11,7 +11,7 @@ module TomosiaAmanaplusCrawl
11
11
  URL = "https://plus.amanaimages.com/items/search"
12
12
 
13
13
  def run(keyword, destination, max)
14
- unparsed_page = HTTParty.get("#{URL}/#{keyword}")
14
+ unparsed_page = open("#{URL}/#{keyword}").read
15
15
  parsed_page = Nokogiri::HTML(unparsed_page)
16
16
 
17
17
  pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
@@ -36,7 +36,7 @@ module TomosiaAmanaplusCrawl
36
36
  while curr_page <= pages
37
37
  puts "Crawling page #{curr_page}..........."
38
38
 
39
- pagination_unparsed_page = HTTParty.get("https://plus.amanaimages.com/items/search/#{keyword}?page=#{curr_page}")
39
+ pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
40
40
  pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
41
41
  pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
42
42
 
@@ -72,7 +72,7 @@ module TomosiaAmanaplusCrawl
72
72
  threads << Thread.new(curr_image) {
73
73
  timeout = 0
74
74
  begin
75
- open(curr_image[:url]) do |image|
75
+ URI.open(curr_image[:url]) do |image|
76
76
  File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
77
77
  file.write(image.read) # lưu hình ảnh
78
78
  curr_image[:size] = image.size # cập nhật lại size trong mảng images
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.1.9"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-08 00:00:00.000000000 Z
11
+ date: 2020-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty