zeitungen 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/lib/zeitungen.rb +1 -0
- data/lib/zeitungen/downloader.rb +32 -9
- data/lib/zeitungen/version.rb +1 -1
- data/zeitungen.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f0b320e2eb2f2e765162d7e7ea1785ed34dd360
|
4
|
+
data.tar.gz: a596e27b09d564b4ec625057cc36734fd086f74d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fae374fd14e3b047743719995a2058babeb5b7e7dbe4b2c4fdf373274040c56c8e5a1ff701762cbf6477413859f343059a3b971b87fc764770cae7bd8fc016b9
|
7
|
+
data.tar.gz: 15d3c52e0f960b11812d84869077804dac613bc40622ad5d2121f465310ee8c297c7d8e84ecb061b5e86a95c8b0aff5f1658703309da3986cee85d8b9f2c88f5
|
data/.gitignore
CHANGED
data/lib/zeitungen.rb
CHANGED
data/lib/zeitungen/downloader.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
module Zeitungen
|
2
2
|
class Downloader
|
3
|
-
def initialize(url, zeitungen, client, passwords)
|
3
|
+
def initialize(url, zeitungen, client, passwords, verbose: false)
|
4
4
|
@url = url
|
5
|
-
@zeitungen
|
5
|
+
@zeitungen = zeitungen
|
6
6
|
@client = client
|
7
7
|
@passwords = passwords
|
8
|
+
@verbose = verbose
|
8
9
|
end
|
9
10
|
|
10
11
|
def run(date=Date.today, options={})
|
@@ -14,6 +15,7 @@ module Zeitungen
|
|
14
15
|
|
15
16
|
page = IndexPage.new(@url, @passwords)
|
16
17
|
zeitungen_links = page.links(date)
|
18
|
+
puts "zeitungen_links.size: #{zeitungen_links.size}" if @verbose
|
17
19
|
# puts zeitungen_links.inspect
|
18
20
|
queue = enqueue(zeitungen_links)
|
19
21
|
|
@@ -37,9 +39,12 @@ module Zeitungen
|
|
37
39
|
@zeitungen.each do |z|
|
38
40
|
if link = zeitungen_links.find{|l| z.regexp.match l.text } # se c'è un link per il zeitungen corrente
|
39
41
|
uri = link.uri
|
42
|
+
puts "uri: #{uri}" if @verbose
|
40
43
|
if uri.host=="t.umblr.com"
|
41
44
|
h = Hash[uri.query.split("&").map{|e| e.split("=")}]
|
42
|
-
|
45
|
+
u = URI(URI.unescape(h["z"])+"\?directDownload\=true")
|
46
|
+
puts "u: #{u}" if @verbose
|
47
|
+
z.uri = u
|
43
48
|
else
|
44
49
|
z.uri = uri+"\?directDownload\=true" # zeitungen.uri+"\?directDownload\=true"
|
45
50
|
end
|
@@ -56,14 +61,32 @@ module Zeitungen
|
|
56
61
|
Thread.new do
|
57
62
|
while !queue.empty? && z = queue.pop
|
58
63
|
begin
|
59
|
-
|
64
|
+
url = z.uri
|
60
65
|
file = Tempfile.new('zeitungen')
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
+
|
67
|
+
|
68
|
+
res = HTTP.get(url)
|
69
|
+
i = 0
|
70
|
+
puts "Response status: #{res.status.to_s}" if @verbose
|
71
|
+
while res.status.to_s=="302 Found" and i<5 # max 5 redirect
|
72
|
+
url = res.headers.get("Location")
|
73
|
+
url = url.first if url.is_a? Array
|
74
|
+
puts "Redirect URL: #{url}" if @verbose
|
75
|
+
res = HTTP.get(url)
|
76
|
+
puts "Response status: #{res.status.to_s}" if @verbose
|
77
|
+
i += 1
|
66
78
|
end
|
79
|
+
puts "downloading #{z.final_name} (#{url})..."
|
80
|
+
file.write(res.to_s)
|
81
|
+
|
82
|
+
|
83
|
+
# res = HTTP.get()
|
84
|
+
# Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme=='https') do |http|
|
85
|
+
# request = Net::HTTP::Get.new(uri)
|
86
|
+
# puts "downloading #{z.final_name} (#{z.uri})..."
|
87
|
+
# response = http.request request
|
88
|
+
# file.write(response.body)
|
89
|
+
# end
|
67
90
|
|
68
91
|
filename = filename_w_date(z.final_name)
|
69
92
|
z.upload ? @client.mv_file_in_public_dest(filename, file) : @client.mv_file_in_private_dest(filename, file)
|
data/lib/zeitungen/version.rb
CHANGED
data/zeitungen.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zeitungen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Iwan Buetti
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: http
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
181
195
|
description:
|
182
196
|
email:
|
183
197
|
- iwan.buetti@gmail.com
|