tomosia_amanaplus_crawl 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e3bc628d6f82c6357d3a4362464870e6e1ac3bc696ad7bc3f91fd7fd5901b7fe
4
- data.tar.gz: 7184c0c7edaecb96312ab0650fb4395cc02d50175e51c0d35ce2600e6c9ac61b
3
+ metadata.gz: 78adb558a362c9594df8a864dad75bf4bc95472eb64e2aa9ef1499d2a39f6837
4
+ data.tar.gz: d2d670dbc023aa1f1ce265f78245b18a2177da373b0e4436a8a88ced97f9677b
5
5
  SHA512:
6
- metadata.gz: ab5bc7f49db71fb490d1cf99da244c1321edcf98aec5678415229f4faf6a29fa5edd2589d8abf90b66790e142c1ef276a8e24bb65755c6b2eb5f2ea805dff1f6
7
- data.tar.gz: b89334b70ee3693add75baa2e49bff8e2b968c1caac0d21adea45c41a31984b21e713127d8412cde046a478d714da2bd3a9186e0838923c964262967c1f60bf4
6
+ metadata.gz: 382535d1072a6803ffd0166ee70b99187514b9119656e94c852519ef3c608ed6d829a86d19d4d8a20027a6fc1e1a3fe909defc4644e139002998751a40bb124c
7
+ data.tar.gz: d18803def75f4efa16e4e4339e3f0b1e64a6a54f77c4a8a6b45d5cb8991e6a3e5a87a0c4d1047c22ad8fdab4266c1c3dedac07660663aeb6557624266e6c6809
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tomosia_amanaplus_crawl (0.1.8)
5
- httparty (= 0.18.1)
4
+ tomosia_amanaplus_crawl (0.2.3)
6
5
  nokogiri (= 1.10.10)
7
6
  spreadsheet (= 1.2.6)
8
7
  thor
@@ -11,14 +10,7 @@ GEM
11
10
  remote: https://rubygems.org/
12
11
  specs:
13
12
  diff-lcs (1.4.4)
14
- httparty (0.18.1)
15
- mime-types (~> 3.0)
16
- multi_xml (>= 0.5.2)
17
- mime-types (3.3.1)
18
- mime-types-data (~> 3.2015)
19
- mime-types-data (3.2020.0512)
20
13
  mini_portile2 (2.4.0)
21
- multi_xml (0.6.0)
22
14
  nokogiri (1.10.10)
23
15
  mini_portile2 (~> 2.4.0)
24
16
  rake (12.3.3)
@@ -15,11 +15,11 @@ module TomosiaAmanaplusCrawl
15
15
 
16
16
  pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
17
17
  images_listings = parsed_page.css("div.p-search-result__body") # danh sách các thẻ div chứa image
18
-
18
+
19
19
  # lấy tổng số image
20
20
  total = parsed_page.css("h1.p-search-result__ttl").text.split(' ').first
21
21
  total = total[(6 + keyword.length)..(total.length - 1)].chop.chop.chop.gsub(',', '').to_i
22
- if max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
22
+ if max == nil || max > total # nếu max lớn hơn total thì max = total => vẫn lấy hết
23
23
  max = total
24
24
  end
25
25
 
@@ -34,11 +34,11 @@ module TomosiaAmanaplusCrawl
34
34
  curr_index = 1
35
35
  while curr_page <= pages
36
36
  puts "Crawling page #{curr_page}..........."
37
-
37
+
38
38
  pagination_unparsed_page = open("#{URL}/#{keyword}?page=#{curr_page}").read
39
39
  pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
40
40
  pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
41
-
41
+
42
42
  pagination_images_listings.each do |img|
43
43
  if curr_index > max
44
44
  return images
@@ -54,7 +54,7 @@ module TomosiaAmanaplusCrawl
54
54
  images << current_image
55
55
  curr_index += 1
56
56
  end
57
-
57
+
58
58
  curr_page += 1
59
59
  end
60
60
  images
@@ -75,7 +75,7 @@ module TomosiaAmanaplusCrawl
75
75
  File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
76
76
  file.write(image.read) # lưu hình ảnh
77
77
  curr_image[:size] = image.size # cập nhật lại size trong mảng images
78
- print "."
78
+ print '.'
79
79
  end
80
80
  end # end open
81
81
  rescue => exception
@@ -99,10 +99,9 @@ module TomosiaAmanaplusCrawl
99
99
  book = Spreadsheet::Workbook.new
100
100
  sheet1 = book.create_worksheet
101
101
 
102
- i = 0
103
102
  sheet1.row(0).concat %w{Title Url Size(bytes) Extension}
104
103
  puts "Writing..........."
105
- images.each do |img|
104
+ images.each_with_index do |img, i|
106
105
  sheet1.row(i += 1).push img[:title], img[:url], img[:size], img[:extension]
107
106
  end
108
107
  puts "Writed."
@@ -8,7 +8,7 @@ module TomosiaAmanaplusCrawl
8
8
  option :destination
9
9
  option :max
10
10
  def crawl(keyword)
11
- TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max].to_i)
11
+ TomosiaAmanaplusCrawl::Crawler.new.run(keyword, options[:destination], options[:max] == nil ? nil : options[:max].to_i)
12
12
  end
13
13
  end
14
14
  end
@@ -1,3 +1,3 @@
1
1
  module TomosiaAmanaplusCrawl
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomosia_amanaplus_crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nhat Huy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-10 00:00:00.000000000 Z
11
+ date: 2020-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri