aws-whitepaper-downloader 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1945c635d5d084efc5fa70451a474a9d4486b4de
|
4
|
+
data.tar.gz: f8dcd349cc8c967fd2f375103a463b4cea15ba96
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 444f896f2c746ba7e44359c93bc79b1a043f66eac57c7052a6c590261c6cae0f9ce2ae6dcf56d4fc2e5cb2b21b6834e1ca6cc5aafdf0124c23c438f98e4fe15f
|
7
|
+
data.tar.gz: d069f3a3f9e522d2aa9db8d5e1de8bbcc5cff3f2bc1b569bedfa66162dd6818c631d87a3893e6cc7b3340984e909858a08430030afd9ae0f85a942a20af6494d
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/aws_whitepaper_downloader.rb'
|
4
|
+
require "aws_whitepaper_downloader/downloader"
|
5
|
+
require 'commander/import'
|
6
|
+
require "fileutils"
|
7
|
+
|
8
|
+
program :name, 'Aws White Paper Downloader'
|
9
|
+
program :version, '1.0.0'
|
10
|
+
program :description, 'download all whitepaper sorted by category'
|
11
|
+
|
12
|
+
command :all do |c|
|
13
|
+
c.syntax = 'aws-whitepaper-downloader all'
|
14
|
+
c.description = ''
|
15
|
+
c.action do |args, options|
|
16
|
+
AwsWhitepaperDownloader.run( Dir.pwd )
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require "aws_whitepaper_downloader/version"
|
2
|
+
require "aws_whitepaper_downloader/crawler"
|
3
|
+
require "aws_whitepaper_downloader/downloader"
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
module AwsWhitepaperDownloader
|
7
|
+
def self.run( dir_path=nil )
|
8
|
+
links_hash = Crawler.new.run
|
9
|
+
Downloader.new.run( links_hash, dir_path )
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module AwsWhitepaperDownloader
|
5
|
+
class Crawler
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
end
|
9
|
+
|
10
|
+
def run
|
11
|
+
count = 0
|
12
|
+
start = nil
|
13
|
+
output = {}
|
14
|
+
flag = 0
|
15
|
+
current_hash = nil
|
16
|
+
pdfs = nil
|
17
|
+
title = nil
|
18
|
+
doc = Nokogiri::HTML( open("https://aws.amazon.com/whitepapers/"))
|
19
|
+
doc.css(".content > .title-wrapper, .content > .row-builder").each do |div|
|
20
|
+
|
21
|
+
# div == title-wrapper
|
22
|
+
if div['class'].match("title-wrapper")
|
23
|
+
title = parse_title( div )
|
24
|
+
output[title] ||= {}
|
25
|
+
current_hash = output[title]
|
26
|
+
end
|
27
|
+
|
28
|
+
# div == row-builder
|
29
|
+
next unless current_hash
|
30
|
+
next unless div['class'].match("row-builder")
|
31
|
+
# nested
|
32
|
+
if div.css('h3').size > 0
|
33
|
+
current_hash['type'] = 'nested'
|
34
|
+
sub_divs = div.css(
|
35
|
+
".wrapper > .columns > .parsys > .columnbuilder,
|
36
|
+
.wrapper > .columns > .parsys > .title-wrapper"
|
37
|
+
)
|
38
|
+
sub_divs.each do |sub_div|
|
39
|
+
if subtitle = parse_subtitle( sub_div )
|
40
|
+
current_hash[subtitle] ||= {}
|
41
|
+
pdfs = current_hash[subtitle]
|
42
|
+
end
|
43
|
+
pdfs = parse_pdfs(sub_div, pdfs)
|
44
|
+
end
|
45
|
+
# normal
|
46
|
+
else
|
47
|
+
current_hash['type'] = 'normal'
|
48
|
+
pdfs = output[title]
|
49
|
+
pdfs = parse_pdfs(div, pdfs)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
return output
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def parse_title( title_wrapper )
|
59
|
+
return title_wrapper.css('h1').text.strip
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_subtitle( row_builder )
|
63
|
+
h3_title = row_builder.css('h3').text.strip
|
64
|
+
return h3_title.size > 0 ? h3_title : nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse_pdfs( html, pdfs )
|
68
|
+
html.css('li').each do |x|
|
69
|
+
pdf_name = x.css('b').text()
|
70
|
+
pdf_url = "https:#{ x.css('a').first['href'] }" if x.css('a').first
|
71
|
+
pdfs[pdf_name] = pdf_url if pdf_name.size > 0 && pdf_url.size > 0
|
72
|
+
end
|
73
|
+
return pdfs
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module AwsWhitepaperDownloader
|
2
|
+
class Downloader
|
3
|
+
|
4
|
+
WHITE_PAPPER_ROOT_DIR = "#{__dir__}/white pappers"
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
end
|
8
|
+
|
9
|
+
def run( hash, download_path )
|
10
|
+
download_path = download_path ? "#{ download_path }/white pappers" : WHITE_PAPPER_ROOT_DIR
|
11
|
+
check_dir_created( nil, download_path )
|
12
|
+
|
13
|
+
hash.each_pair do |key, value|
|
14
|
+
current_dir = check_dir_created( key, download_path )
|
15
|
+
puts "---------------- current_dir -------------------"
|
16
|
+
puts current_dir
|
17
|
+
puts "-------------------------------------------------"
|
18
|
+
case value['type']
|
19
|
+
when 'normal'
|
20
|
+
Dir.chdir current_dir
|
21
|
+
puts "---------------- Dir.chdir curren_dir -------------------"
|
22
|
+
puts current_dir
|
23
|
+
puts "-------------------------------------------------"
|
24
|
+
puts 'normal'
|
25
|
+
hash.each_pair do |k1, v1|
|
26
|
+
next if k1 == 'type'
|
27
|
+
v1.each do |k2, v2|
|
28
|
+
next if k2 == 'type'
|
29
|
+
download( k2, v2,"#{current_dir}/#{v2}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
when 'nested'
|
33
|
+
puts 'nested'
|
34
|
+
value.each_pair do |k1, v1|
|
35
|
+
next if k1 == 'type'
|
36
|
+
nested_dir = check_dir_created( k1, current_dir )
|
37
|
+
Dir.chdir( nested_dir )
|
38
|
+
puts "---------------- nested_dir -------------------"
|
39
|
+
puts nested_dir
|
40
|
+
puts "------------ㄦ-----------------------------"
|
41
|
+
|
42
|
+
v1.each do |k2, v2|
|
43
|
+
next if k2 == 'type'
|
44
|
+
download( k2, v2,"#{nested_dir}/#{v2}")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
# def run( download_dir )
|
52
|
+
# end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def download( file_name, path, download_dir )
|
57
|
+
unless File.exist? "#{file_name}.pdf"
|
58
|
+
`wget "#{file_name}.pdf" #{path}`
|
59
|
+
end
|
60
|
+
end
|
61
|
+
def check_dir_created( name, path )
|
62
|
+
location = "#{path}/#{name}"
|
63
|
+
Dir.mkdir location unless Dir.exist?( location )
|
64
|
+
return location
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: aws-whitepaper-downloader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- StevenTTuD
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.14'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: commander
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Downloading all aws whitepapers by 1 command
|
70
|
+
email:
|
71
|
+
- StevenTTuD@gmail.com
|
72
|
+
executables:
|
73
|
+
- aws-whitepaper-downloader
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- bin/aws-whitepaper-downloader
|
78
|
+
- lib/aws_whitepaper_downloader.rb
|
79
|
+
- lib/aws_whitepaper_downloader/crawler.rb
|
80
|
+
- lib/aws_whitepaper_downloader/downloader.rb
|
81
|
+
- lib/aws_whitepaper_downloader/version.rb
|
82
|
+
homepage: https://github.com/StevenTTuD/aws-whitepaper-downloader
|
83
|
+
licenses: []
|
84
|
+
metadata:
|
85
|
+
allowed_push_host: https://rubygems.org
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 2.6.12
|
103
|
+
signing_key:
|
104
|
+
specification_version: 4
|
105
|
+
summary: Downloading all aws whitepapers by 1 command
|
106
|
+
test_files: []
|
107
|
+
has_rdoc:
|