brandeins 0.0.16 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/brandeins.gemspec +1 -0
- data/lib/brandeins.rb +89 -30
- data/lib/brandeins/pdf-tools.rb +88 -0
- data/lib/brandeins/setup.rb +12 -9
- data/lib/brandeins/version.rb +1 -1
- data/test/brandeins_test.rb +22 -2
- metadata +21 -4
data/brandeins.gemspec
CHANGED
data/lib/brandeins.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
%w(
|
2
2
|
brandeins/version
|
3
3
|
brandeins/setup
|
4
|
+
brandeins/pdf-tools
|
4
5
|
nokogiri
|
5
6
|
open-uri
|
6
7
|
uri
|
7
8
|
fileutils
|
8
9
|
thor
|
10
|
+
prawn
|
9
11
|
).each do |lib|
|
12
|
+
begin
|
10
13
|
require lib
|
14
|
+
rescue Exception => e
|
15
|
+
puts "missing #{lib}, #{e.inspect}"
|
16
|
+
end
|
11
17
|
end
|
12
18
|
|
13
19
|
module BrandEins
|
@@ -47,6 +53,24 @@ module BrandEins
|
|
47
53
|
def setup
|
48
54
|
BrandEinsSetup.new
|
49
55
|
end
|
56
|
+
|
57
|
+
desc 'test', 'test some stuff'
|
58
|
+
method_option :input, :type => :string
|
59
|
+
method_option :output, :type => :string
|
60
|
+
def test
|
61
|
+
gs = BrandEins::PdfTools::GhostscriptWin.new
|
62
|
+
if gs.available?
|
63
|
+
puts "GS is available"
|
64
|
+
if options.input.nil? || options.output.nil?
|
65
|
+
puts "need input/output to merge files"
|
66
|
+
else
|
67
|
+
puts "input: #{options.input}, output: #{options.output}"
|
68
|
+
BrandEins::PdfTools::GhostscriptWin.merge_pdf_files(options.input, options.output)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
puts "GS not found"
|
72
|
+
end
|
73
|
+
end
|
50
74
|
end
|
51
75
|
|
52
76
|
class Downloader
|
@@ -56,7 +80,8 @@ module BrandEins
|
|
56
80
|
@url = 'http://www.brandeins.de'
|
57
81
|
@archive = false
|
58
82
|
@dl_dir = path
|
59
|
-
@tmp_dir = path + '/tmp'
|
83
|
+
@tmp_dir = path + '/brand-eins-tmp'
|
84
|
+
@pdftool = BrandEins::PdfTools.get_pdf_tool
|
60
85
|
create_tmp_dirs
|
61
86
|
end
|
62
87
|
|
@@ -83,7 +108,7 @@ module BrandEins
|
|
83
108
|
magazine_links = @archive.get_magazine_links_by_year(year)
|
84
109
|
target_magazine_link = magazine_links[volume-1]
|
85
110
|
|
86
|
-
get_magazine_by_link(target_magazine_link, target_pdf)
|
111
|
+
get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
|
87
112
|
end
|
88
113
|
|
89
114
|
private
|
@@ -91,26 +116,58 @@ module BrandEins
|
|
91
116
|
FileUtils.mkdir_p @tmp_dir unless File.directory?(@tmp_dir)
|
92
117
|
end
|
93
118
|
|
94
|
-
def get_magazine_by_link(target_magazine_link, target_pdf)
|
119
|
+
def get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
|
95
120
|
pdf_links = @archive.magazine_pdf_links(target_magazine_link)
|
96
|
-
|
97
|
-
|
121
|
+
pdf_files = download_pdfs(pdf_links)
|
122
|
+
|
123
|
+
pdf_cover = create_cover_pdf(year, volume)
|
124
|
+
pdf_files = pdf_files.reverse.push(pdf_cover).reverse
|
125
|
+
|
126
|
+
if !@pdftool.nil?
|
127
|
+
@pdftool.merge_pdf_files(pdf_files, target_pdf)
|
128
|
+
cleanup
|
129
|
+
else
|
130
|
+
if RUBY_PLATFORM.include? 'darwin'
|
131
|
+
puts 'brandeins wont merge the single pdf files since it didnt find the pdftk tool'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def create_cover_pdf(year, volume)
|
137
|
+
cover = @archive.get_magazine_cover(year, volume)
|
138
|
+
cover_title = cover[:title]
|
139
|
+
cover_img_url = cover[:img_url]
|
140
|
+
cover_img_file = @tmp_dir + "/cover-#{year}-#{volume}.jpg"
|
141
|
+
cover_pdf_file = @tmp_dir + "/cover-#{year}-#{volume}.pdf"
|
142
|
+
|
143
|
+
File.open(cover_img_file,'w') do |f|
|
144
|
+
uri = URI.parse(cover_img_url)
|
145
|
+
Net::HTTP.start(uri.host,uri.port) do |http|
|
146
|
+
http.request_get(uri.path) do |res|
|
147
|
+
res.read_body do |seg|
|
148
|
+
f << seg
|
149
|
+
#hack -- adjust to suit:
|
150
|
+
sleep 0.005
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
require 'prawn'
|
157
|
+
Prawn::Document.generate(cover_pdf_file) do |pdf|
|
158
|
+
pdf.text "<font size='18'><b>" + cover_title + "</b></font>", :align => :center, :inline_format => true
|
159
|
+
pdf.image cover_img_file, :position => :center, :vposition => :center
|
160
|
+
end
|
161
|
+
return cover_pdf_file
|
98
162
|
end
|
99
163
|
|
100
164
|
def get_target_pdf(year, volume)
|
101
165
|
"Brand-Eins-#{year}-#{volume}.pdf"
|
102
166
|
end
|
103
167
|
|
104
|
-
def
|
168
|
+
def download_pdfs(pdf_links)
|
105
169
|
pdf_downloader = PDFDownloader.new(pdf_links, @tmp_dir)
|
106
|
-
|
107
|
-
merge_pdfs(pdf_files, target_pdf)
|
108
|
-
end
|
109
|
-
|
110
|
-
def merge_pdfs(pdf_files, target_pdf)
|
111
|
-
puts "Merging single PDFs now"
|
112
|
-
pdf_sources = pdf_files.join(" ")
|
113
|
-
system "pdftk #{pdf_sources} output #{@dl_dir}/#{target_pdf}"
|
170
|
+
pdf_downloader.download_all
|
114
171
|
end
|
115
172
|
|
116
173
|
def cleanup
|
@@ -130,7 +187,6 @@ module BrandEins
|
|
130
187
|
pdf_name = @dl_dir + '/' + File.basename(pdf_link)
|
131
188
|
pdf_url = pdf_link
|
132
189
|
download_pdf(pdf_url, pdf_name)
|
133
|
-
|
134
190
|
pdf_files << pdf_name
|
135
191
|
end
|
136
192
|
pdf_files
|
@@ -139,17 +195,15 @@ module BrandEins
|
|
139
195
|
private
|
140
196
|
|
141
197
|
def download_pdf(pdf_url, filename)
|
198
|
+
if File.exists? filename
|
199
|
+
puts "File #{filename} seems to be already downloaded"
|
200
|
+
return true
|
201
|
+
end
|
202
|
+
|
142
203
|
puts "Downloading PDF from #{pdf_url} to #{filename}"
|
143
|
-
File.open(filename,'
|
144
|
-
|
145
|
-
|
146
|
-
http.request_get(uri.path) do |res|
|
147
|
-
res.read_body do |seg|
|
148
|
-
f << seg
|
149
|
-
#hack -- adjust to suit:
|
150
|
-
sleep 0.005
|
151
|
-
end
|
152
|
-
end
|
204
|
+
File.open(filename,'wb') do |new_file|
|
205
|
+
open(pdf_url, 'rb') do |read_file|
|
206
|
+
new_file.write(read_file.read)
|
153
207
|
end
|
154
208
|
end
|
155
209
|
end
|
@@ -190,6 +244,15 @@ module BrandEins
|
|
190
244
|
magazine_links
|
191
245
|
end
|
192
246
|
|
247
|
+
def get_magazine_cover(year, volume)
|
248
|
+
title = @doc.css("#month_detail_#{year}_#{volume} .titel").children[0].to_s
|
249
|
+
img_url = ''
|
250
|
+
@doc.css("#month_detail_#{year}_#{volume} .cover a img").each do |node|
|
251
|
+
img_url = node['src']
|
252
|
+
end
|
253
|
+
return { :title => title, :img_url => @base_url + '/' + img_url }
|
254
|
+
end
|
255
|
+
|
193
256
|
def magazine_pdf_links(url)
|
194
257
|
magazine = ArchiveMagazine.new(url, @base_url)
|
195
258
|
magazine.get_magazine_pdf_links
|
@@ -207,7 +270,6 @@ module BrandEins
|
|
207
270
|
|
208
271
|
def get_magazine_pdf_links
|
209
272
|
[get_editorial_article_links, get_schwerpunkt_article_links].flatten
|
210
|
-
|
211
273
|
end
|
212
274
|
|
213
275
|
def get_schwerpunkt_article_links
|
@@ -250,14 +312,11 @@ module BrandEins
|
|
250
312
|
if link[0].nil? then
|
251
313
|
return nil
|
252
314
|
else
|
253
|
-
|
315
|
+
return link[0]['href']
|
254
316
|
end
|
255
317
|
end
|
256
|
-
|
257
318
|
end
|
258
|
-
|
259
319
|
end
|
260
320
|
end
|
261
|
-
|
262
321
|
end
|
263
322
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module BrandEins
|
2
|
+
module PdfTools
|
3
|
+
attr_reader :pdf_tools, :pdf_tool
|
4
|
+
|
5
|
+
def self.get_pdf_tool
|
6
|
+
@pdf_tools ||= _init_pdf_tools
|
7
|
+
@pdf_tool ||= @pdf_tools.first.new if @pdf_tools.length > 0
|
8
|
+
return @pdf_tool
|
9
|
+
end
|
10
|
+
|
11
|
+
class Template
|
12
|
+
attr_accessor :cmd, :args, :info
|
13
|
+
|
14
|
+
def available?
|
15
|
+
puts "trying to execute: #{@cmd}"
|
16
|
+
_cmd_available? @cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def merge_pdf_files(pdf_files, target_pdf)
|
20
|
+
begin
|
21
|
+
puts "args: #{@args}"
|
22
|
+
arg_files = pdf_files.join " "
|
23
|
+
args = self.args.gsub(/__pdf_files__/, arg_files).gsub(/__target_pdf__/, target_pdf)
|
24
|
+
puts "executing: #{@cmd} #{args}"
|
25
|
+
open("|#{@cmd} #{args}").close
|
26
|
+
rescue Exception => e
|
27
|
+
puts "error: #{e.inspect}"
|
28
|
+
return false
|
29
|
+
end
|
30
|
+
return true
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def _cmd_available? (cmd)
|
35
|
+
begin
|
36
|
+
open("|#{cmd}").close
|
37
|
+
rescue Exception
|
38
|
+
return false
|
39
|
+
end
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class TemplateWin < Template; end
|
45
|
+
class TemplateOSX < Template; end
|
46
|
+
|
47
|
+
class PdftkOSX < TemplateOSX
|
48
|
+
def initialize
|
49
|
+
@cmd = 'pdftk2'
|
50
|
+
@args = '__pdf_files__ output __target_pdf__'
|
51
|
+
@info = 'Visit http://test.com'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class GhostscriptWin < TemplateWin
|
56
|
+
def initialize
|
57
|
+
@cmd = '"C:/Program Files/gs/gs9.06/bin/gswin64c.exe"'
|
58
|
+
@args = ' -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=__target_pdf__ __pdf_files__'
|
59
|
+
@info = 'Visit me!'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def self._init_pdf_tools
|
65
|
+
@pdf_tools = Array.new
|
66
|
+
if RUBY_PLATFORM.include? 'w32'
|
67
|
+
return _get_subclasses TemplateWin
|
68
|
+
elsif RUBY_PLATFORM.include? 'darwin'
|
69
|
+
return _get_subclasses TemplateOSX
|
70
|
+
else
|
71
|
+
return nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self._get_subclasses(klass)
|
76
|
+
classes = []
|
77
|
+
klass.subclasses.each do |sklass|
|
78
|
+
classes << sklass.new
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class Class
|
85
|
+
def subclasses
|
86
|
+
ObjectSpace.each_object(Class).select { |klass| klass < self }
|
87
|
+
end
|
88
|
+
end
|
data/lib/brandeins/setup.rb
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
class BrandEinsSetup
|
2
|
+
attr_reader :pdf_tools
|
3
|
+
|
2
4
|
def initialize
|
5
|
+
@pdf_tools = Array.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def check_requirements
|
3
9
|
puts 'Checking requirements for your system'
|
4
|
-
if
|
5
|
-
puts
|
10
|
+
if @pdf_tools.any? { |pdf_tool| pdf_tool.available? }
|
11
|
+
puts 'It seems you have at least one supported pdf tool installed. You are ready to go!'
|
6
12
|
else
|
7
|
-
puts
|
8
|
-
puts
|
13
|
+
puts 'It seems you dont have any pdf tools installed or brandeins was not able to locate them.'
|
14
|
+
puts pdf_tool_instructions
|
9
15
|
end
|
10
16
|
end
|
11
17
|
|
12
|
-
def
|
13
|
-
|
18
|
+
def add_pdf_tool(pdf_tool)
|
19
|
+
@pdf_tools << pdf_tool
|
14
20
|
end
|
15
21
|
|
16
|
-
def pdftk?
|
17
|
-
if system('pdftk --version').nil? then false else true end
|
18
|
-
end
|
19
22
|
end
|
data/lib/brandeins/version.rb
CHANGED
data/test/brandeins_test.rb
CHANGED
@@ -12,9 +12,10 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def test_tmp_directories_get_created
|
15
|
+
skip
|
15
16
|
FakeFS do
|
16
|
-
bdl = BrandEins::Downloader.new @dir
|
17
|
-
assert File.directory?(File.expand_path("./#{@dir}/tmp"))
|
17
|
+
#bdl = BrandEins::Downloader.new @dir
|
18
|
+
#assert File.directory?(File.expand_path("./#{@dir}/tmp"))
|
18
19
|
end
|
19
20
|
end
|
20
21
|
|
@@ -35,4 +36,23 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
|
|
35
36
|
assert_equal magazine_links[0], (@base_url + '/magazin/nein-sagen.html')
|
36
37
|
assert_equal magazine_links[1], (@base_url + '/magazin/markenkommunikation.html')
|
37
38
|
end
|
39
|
+
|
40
|
+
def test_get_magazine_cover
|
41
|
+
html =<<-EOF
|
42
|
+
<li class="month_detail" id="month_detail_2012_4">
|
43
|
+
<dl>
|
44
|
+
<dt class="ausgabe">Ausgabe 4/2012</dt>
|
45
|
+
<dt class="titel">SCHWERPUNKT Kapitalismus</dt>
|
46
|
+
<dd class="cover">
|
47
|
+
<a href="magazin/kapitalismus.html" title="Zum Magazin brand eins Online 4 2012">
|
48
|
+
<img src="typo3temp/pics/08ff826417.jpg" width="235" height="311" alt="Ausgabe 04/2012 SCHWERPUNKT Kapitalismus"></a>
|
49
|
+
</dd>
|
50
|
+
</dl>
|
51
|
+
</li>
|
52
|
+
EOF
|
53
|
+
|
54
|
+
archive_site = BrandEins::Downloader::ArchiveSite.new @base_url, html
|
55
|
+
cover = archive_site.get_magazine_cover(2012, 4)
|
56
|
+
assert_equal cover, { :title => "SCHWERPUNKT Kapitalismus", :img_url => "#{@base_url}/typo3temp/pics/08ff826417.jpg" }
|
57
|
+
end
|
38
58
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: brandeins
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: prawn
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: BrandEins gem offers a download command to download a specific or all
|
63
79
|
volumes. Use `brandeins help` to find out more about it
|
64
80
|
email:
|
@@ -77,6 +93,7 @@ files:
|
|
77
93
|
- bin/brandeins
|
78
94
|
- brandeins.gemspec
|
79
95
|
- lib/brandeins.rb
|
96
|
+
- lib/brandeins/pdf-tools.rb
|
80
97
|
- lib/brandeins/setup.rb
|
81
98
|
- lib/brandeins/version.rb
|
82
99
|
- test/brandeins_test.rb
|
@@ -102,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
102
119
|
version: '0'
|
103
120
|
segments:
|
104
121
|
- 0
|
105
|
-
hash:
|
122
|
+
hash: 1148308714876892824
|
106
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
124
|
none: false
|
108
125
|
requirements:
|
@@ -111,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
128
|
version: '0'
|
112
129
|
segments:
|
113
130
|
- 0
|
114
|
-
hash:
|
131
|
+
hash: 1148308714876892824
|
115
132
|
requirements: []
|
116
133
|
rubyforge_project:
|
117
134
|
rubygems_version: 1.8.24
|