tomosia_amanaplus_crawl 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e3bc628d6f82c6357d3a4362464870e6e1ac3bc696ad7bc3f91fd7fd5901b7fe
4
- data.tar.gz: 7184c0c7edaecb96312ab0650fb4395cc02d50175e51c0d35ce2600e6c9ac61b
3
+ metadata.gz: 78adb558a362c9594df8a864dad75bf4bc95472eb64e2aa9ef1499d2a39f6837
4
+ data.tar.gz: d2d670dbc023aa1f1ce265f78245b18a2177da373b0e4436a8a88ced97f9677b
5
5
  SHA512:
6
- metadata.gz: ab5bc7f49db71fb490d1cf99da244c1321edcf98aec5678415229f4faf6a29fa5edd2589d8abf90b66790e142c1ef276a8e24bb65755c6b2eb5f2ea805dff1f6
7
- data.tar.gz: b89334b70ee3693add75baa2e49bff8e2b968c1caac0d21adea45c41a31984b21e713127d8412cde046a478d714da2bd3a9186e0838923c964262967c1f60bf4
6
+ metadata.gz: 382535d1072a6803ffd0166ee70b99187514b9119656e94c852519ef3c608ed6d829a86d19d4d8a20027a6fc1e1a3fe909defc4644e139002998751a40bb124c
7
+ data.tar.gz: d18803def75f4efa16e4e4339e3f0b1e64a6a54f77c4a8a6b45d5cb8991e6a3e5a87a0c4d1047c22ad8fdab4266c1c3dedac07660663aeb6557624266e6c6809
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tomosia_amanaplus_crawl (0.1.8)
5
- httparty (= 0.18.1)
4
+ tomosia_amanaplus_crawl (0.2.3)
6
5
  nokogiri (= 1.10.10)
7
6
  spreadsheet (= 1.2.6)
8
7
  thor
@@ -11,14 +10,7 @@ GEM
11
10
  remote: https://rubygems.org/
12
11
  specs:
13
12
  diff-lcs (1.4.4)
14
- httparty (0.18.1)
15
- mime-types (~> 3.0)
16
- multi_xml (>= 0.5.2)
17
- mime-types (3.3.1)
18
- mime-types-data (~> 3.2015)
19
- mime-types-data (3.2020.0512)
20
13
  mini_portile2 (2.4.0)
21
- multi_xml (0.6.0)
22
14
  nokogiri (1.10.10)
23
15
  mini_portile2 (~> 2.4.0)
24
16
  rake (12.3.3)
@@ -15,11 +15,11 @@ module TomosiaAmanaplusCrawl
15
15
 
16
16
  pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
17
17
  images_listings = parsed_page.css("div.p-search-result__body") # danh sách các thẻ div chứa image
18
-
18
+
19
19
  # lấy tổng số image
20
20
  total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
21
21
  total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
22
- if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
22
+ if max == nil || max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
23
23
  max = total
24
24
  end
25
25
 
@@ -34,11 +34,11 @@ module TomosiaAmanaplusCrawl
34
34
  curr_index = 1
35
35
  while curr_page <= pages
36
36
  puts "Crawling page #{curr_page}..........."
37
-
37
+
38
38
  pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
39
39
  pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
40
40
  pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
41
-
41
+
42
42
  pagination_images_listings.each do |img|
43
43
  if curr_index > max
44
44
  return images
@@ -54,7 +54,7 @@ module TomosiaAmanaplusCrawl
54
54
  images << current_image
55
55
  curr_index += 1
56
56
  end
57
-
57
+
58
58
  curr_page += 1
59
59
  end
60
60
  images
@@ -75,7 +75,7 @@ module TomosiaAmanaplusCrawl
75
75
  File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
76
76
  file.write(image.read) # lưu hình ảnh
77
77
  curr_image[:size] = image.size # cập nhật lại size trong mảng images
78
- print "."
78
+ print '.'
79
79
  end
80
80
  end # end open
81
81
  rescue => exception
@@ -99,10 +99,9 @@ module TomosiaAmanaplusCrawl
99
99
  book = Spreadsheet::Workbook.new
100
100
  sheet1 = book.create_worksheet
101
101
 
102
- i = 0
103
102
  sheet1.row(0).concat %w{Title Url Size(bytes) Extension}
104
103
  puts "Writing..........."
105
- images.each do |img|
104
+ images.each_with_index do |img, i|
106
105
  sheet1.row(i += 1).push img[:title], img[:url], img[:size], img[:extension]
107
106
  end
108
107
  puts "Writed."
@@ -8,7 +8,7 @@ module TomosiaAmanaplusCrawl
8
8
  option :destination
9
9
  option :max
10
10
  def crawl(keyword)
11
- TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
11
+ TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max] == nil ? nil : options[:max].to_i)
12
12
  end
13
13
  end
14
14
  end
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-10 00:00:00.000000000 Z
11
+ date: 2020-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri