brandeins 0.0.16 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/brandeins.gemspec +1 -0
- data/lib/brandeins.rb +89 -30
- data/lib/brandeins/pdf-tools.rb +88 -0
- data/lib/brandeins/setup.rb +12 -9
- data/lib/brandeins/version.rb +1 -1
- data/test/brandeins_test.rb +22 -2
- metadata +21 -4
data/brandeins.gemspec
CHANGED
data/lib/brandeins.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
%w(
|
2
2
|
brandeins/version
|
3
3
|
brandeins/setup
|
4
|
+
brandeins/pdf-tools
|
4
5
|
nokogiri
|
5
6
|
open-uri
|
6
7
|
uri
|
7
8
|
fileutils
|
8
9
|
thor
|
10
|
+
prawn
|
9
11
|
).each do |lib|
|
12
|
+
begin
|
10
13
|
require lib
|
14
|
+
rescue Exception => e
|
15
|
+
puts "missing #{lib}, #{e.inspect}"
|
16
|
+
end
|
11
17
|
end
|
12
18
|
|
13
19
|
module BrandEins
|
@@ -47,6 +53,24 @@ module BrandEins
|
|
47
53
|
def setup
|
48
54
|
BrandEinsSetup.new
|
49
55
|
end
|
56
|
+
|
57
|
+
desc 'test', 'test some stuff'
|
58
|
+
method_option :input, :type => :string
|
59
|
+
method_option :output, :type => :string
|
60
|
+
def test
|
61
|
+
gs = BrandEins::PdfTools::GhostscriptWin.new
|
62
|
+
if gs.available?
|
63
|
+
puts "GS is available"
|
64
|
+
if options.input.nil? || options.output.nil?
|
65
|
+
puts "need input/output to merge files"
|
66
|
+
else
|
67
|
+
puts "input: #{options.input}, output: #{options.output}"
|
68
|
+
BrandEins::PdfTools::GhostscriptWin.merge_pdf_files(options.input, options.output)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
puts "GS not found"
|
72
|
+
end
|
73
|
+
end
|
50
74
|
end
|
51
75
|
|
52
76
|
class Downloader
|
@@ -56,7 +80,8 @@ module BrandEins
|
|
56
80
|
@url = 'http://www.brandeins.de'
|
57
81
|
@archive = false
|
58
82
|
@dl_dir = path
|
59
|
-
@tmp_dir = path + '/tmp'
|
83
|
+
@tmp_dir = path + '/brand-eins-tmp'
|
84
|
+
@pdftool = BrandEins::PdfTools.get_pdf_tool
|
60
85
|
create_tmp_dirs
|
61
86
|
end
|
62
87
|
|
@@ -83,7 +108,7 @@ module BrandEins
|
|
83
108
|
magazine_links = @archive.get_magazine_links_by_year(year)
|
84
109
|
target_magazine_link = magazine_links[volume-1]
|
85
110
|
|
86
|
-
get_magazine_by_link(target_magazine_link, target_pdf)
|
111
|
+
get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
|
87
112
|
end
|
88
113
|
|
89
114
|
private
|
@@ -91,26 +116,58 @@ module BrandEins
|
|
91
116
|
FileUtils.mkdir_p @tmp_dir unless File.directory?(@tmp_dir)
|
92
117
|
end
|
93
118
|
|
94
|
-
def get_magazine_by_link(target_magazine_link, target_pdf)
|
119
|
+
def get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
|
95
120
|
pdf_links = @archive.magazine_pdf_links(target_magazine_link)
|
96
|
-
|
97
|
-
|
121
|
+
pdf_files = download_pdfs(pdf_links)
|
122
|
+
|
123
|
+
pdf_cover = create_cover_pdf(year, volume)
|
124
|
+
pdf_files = pdf_files.reverse.push(pdf_cover).reverse
|
125
|
+
|
126
|
+
if !@pdftool.nil?
|
127
|
+
@pdftool.merge_pdf_files(pdf_files, target_pdf)
|
128
|
+
cleanup
|
129
|
+
else
|
130
|
+
if RUBY_PLATFORM.include? 'darwin'
|
131
|
+
puts 'brandeins wont merge the single pdf files since it didnt find the pdftk tool'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def create_cover_pdf(year, volume)
|
137
|
+
cover = @archive.get_magazine_cover(year, volume)
|
138
|
+
cover_title = cover[:title]
|
139
|
+
cover_img_url = cover[:img_url]
|
140
|
+
cover_img_file = @tmp_dir + "/cover-#{year}-#{volume}.jpg"
|
141
|
+
cover_pdf_file = @tmp_dir + "/cover-#{year}-#{volume}.pdf"
|
142
|
+
|
143
|
+
File.open(cover_img_file,'w') do |f|
|
144
|
+
uri = URI.parse(cover_img_url)
|
145
|
+
Net::HTTP.start(uri.host,uri.port) do |http|
|
146
|
+
http.request_get(uri.path) do |res|
|
147
|
+
res.read_body do |seg|
|
148
|
+
f << seg
|
149
|
+
#hack -- adjust to suit:
|
150
|
+
sleep 0.005
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
require 'prawn'
|
157
|
+
Prawn::Document.generate(cover_pdf_file) do |pdf|
|
158
|
+
pdf.text "<font size='18'><b>" + cover_title + "</b></font>", :align => :center, :inline_format => true
|
159
|
+
pdf.image cover_img_file, :position => :center, :vposition => :center
|
160
|
+
end
|
161
|
+
return cover_pdf_file
|
98
162
|
end
|
99
163
|
|
100
164
|
def get_target_pdf(year, volume)
|
101
165
|
"Brand-Eins-#{year}-#{volume}.pdf"
|
102
166
|
end
|
103
167
|
|
104
|
-
def
|
168
|
+
def download_pdfs(pdf_links)
|
105
169
|
pdf_downloader = PDFDownloader.new(pdf_links, @tmp_dir)
|
106
|
-
|
107
|
-
merge_pdfs(pdf_files, target_pdf)
|
108
|
-
end
|
109
|
-
|
110
|
-
def merge_pdfs(pdf_files, target_pdf)
|
111
|
-
puts "Merging single PDFs now"
|
112
|
-
pdf_sources = pdf_files.join(" ")
|
113
|
-
system "pdftk #{pdf_sources} output #{@dl_dir}/#{target_pdf}"
|
170
|
+
pdf_downloader.download_all
|
114
171
|
end
|
115
172
|
|
116
173
|
def cleanup
|
@@ -130,7 +187,6 @@ module BrandEins
|
|
130
187
|
pdf_name = @dl_dir + '/' + File.basename(pdf_link)
|
131
188
|
pdf_url = pdf_link
|
132
189
|
download_pdf(pdf_url, pdf_name)
|
133
|
-
|
134
190
|
pdf_files << pdf_name
|
135
191
|
end
|
136
192
|
pdf_files
|
@@ -139,17 +195,15 @@ module BrandEins
|
|
139
195
|
private
|
140
196
|
|
141
197
|
def download_pdf(pdf_url, filename)
|
198
|
+
if File.exists? filename
|
199
|
+
puts "File #{filename} seems to be already downloaded"
|
200
|
+
return true
|
201
|
+
end
|
202
|
+
|
142
203
|
puts "Downloading PDF from #{pdf_url} to #{filename}"
|
143
|
-
File.open(filename,'
|
144
|
-
|
145
|
-
|
146
|
-
http.request_get(uri.path) do |res|
|
147
|
-
res.read_body do |seg|
|
148
|
-
f << seg
|
149
|
-
#hack -- adjust to suit:
|
150
|
-
sleep 0.005
|
151
|
-
end
|
152
|
-
end
|
204
|
+
File.open(filename,'wb') do |new_file|
|
205
|
+
open(pdf_url, 'rb') do |read_file|
|
206
|
+
new_file.write(read_file.read)
|
153
207
|
end
|
154
208
|
end
|
155
209
|
end
|
@@ -190,6 +244,15 @@ module BrandEins
|
|
190
244
|
magazine_links
|
191
245
|
end
|
192
246
|
|
247
|
+
def get_magazine_cover(year, volume)
|
248
|
+
title = @doc.css("#month_detail_#{year}_#{volume} .titel").children[0].to_s
|
249
|
+
img_url = ''
|
250
|
+
@doc.css("#month_detail_#{year}_#{volume} .cover a img").each do |node|
|
251
|
+
img_url = node['src']
|
252
|
+
end
|
253
|
+
return { :title => title, :img_url => @base_url + '/' + img_url }
|
254
|
+
end
|
255
|
+
|
193
256
|
def magazine_pdf_links(url)
|
194
257
|
magazine = ArchiveMagazine.new(url, @base_url)
|
195
258
|
magazine.get_magazine_pdf_links
|
@@ -207,7 +270,6 @@ module BrandEins
|
|
207
270
|
|
208
271
|
def get_magazine_pdf_links
|
209
272
|
[get_editorial_article_links, get_schwerpunkt_article_links].flatten
|
210
|
-
|
211
273
|
end
|
212
274
|
|
213
275
|
def get_schwerpunkt_article_links
|
@@ -250,14 +312,11 @@ module BrandEins
|
|
250
312
|
if link[0].nil? then
|
251
313
|
return nil
|
252
314
|
else
|
253
|
-
|
315
|
+
return link[0]['href']
|
254
316
|
end
|
255
317
|
end
|
256
|
-
|
257
318
|
end
|
258
|
-
|
259
319
|
end
|
260
320
|
end
|
261
|
-
|
262
321
|
end
|
263
322
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module BrandEins
|
2
|
+
module PdfTools
|
3
|
+
attr_reader :pdf_tools, :pdf_tool
|
4
|
+
|
5
|
+
def self.get_pdf_tool
|
6
|
+
@pdf_tools ||= _init_pdf_tools
|
7
|
+
@pdf_tool ||= @pdf_tools.first.new if @pdf_tools.length > 0
|
8
|
+
return @pdf_tool
|
9
|
+
end
|
10
|
+
|
11
|
+
class Template
|
12
|
+
attr_accessor :cmd, :args, :info
|
13
|
+
|
14
|
+
def available?
|
15
|
+
puts "trying to execute: #{@cmd}"
|
16
|
+
_cmd_available? @cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def merge_pdf_files(pdf_files, target_pdf)
|
20
|
+
begin
|
21
|
+
puts "args: #{@args}"
|
22
|
+
arg_files = pdf_files.join " "
|
23
|
+
args = self.args.gsub(/__pdf_files__/, arg_files).gsub(/__target_pdf__/, target_pdf)
|
24
|
+
puts "executing: #{@cmd} #{args}"
|
25
|
+
open("|#{@cmd} #{args}").close
|
26
|
+
rescue Exception => e
|
27
|
+
puts "error: #{e.inspect}"
|
28
|
+
return false
|
29
|
+
end
|
30
|
+
return true
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def _cmd_available? (cmd)
|
35
|
+
begin
|
36
|
+
open("|#{cmd}").close
|
37
|
+
rescue Exception
|
38
|
+
return false
|
39
|
+
end
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class TemplateWin < Template; end
|
45
|
+
class TemplateOSX < Template; end
|
46
|
+
|
47
|
+
class PdftkOSX < TemplateOSX
|
48
|
+
def initialize
|
49
|
+
@cmd = 'pdftk2'
|
50
|
+
@args = '__pdf_files__ output __target_pdf__'
|
51
|
+
@info = 'Visit http://test.com'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class GhostscriptWin < TemplateWin
|
56
|
+
def initialize
|
57
|
+
@cmd = '"C:/Program Files/gs/gs9.06/bin/gswin64c.exe"'
|
58
|
+
@args = ' -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=__target_pdf__ __pdf_files__'
|
59
|
+
@info = 'Visit me!'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def self._init_pdf_tools
|
65
|
+
@pdf_tools = Array.new
|
66
|
+
if RUBY_PLATFORM.include? 'w32'
|
67
|
+
return _get_subclasses TemplateWin
|
68
|
+
elsif RUBY_PLATFORM.include? 'darwin'
|
69
|
+
return _get_subclasses TemplateOSX
|
70
|
+
else
|
71
|
+
return nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self._get_subclasses(klass)
|
76
|
+
classes = []
|
77
|
+
klass.subclasses.each do |sklass|
|
78
|
+
classes << sklass.new
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class Class
|
85
|
+
def subclasses
|
86
|
+
ObjectSpace.each_object(Class).select { |klass| klass < self }
|
87
|
+
end
|
88
|
+
end
|
data/lib/brandeins/setup.rb
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
class BrandEinsSetup
|
2
|
+
attr_reader :pdf_tools
|
3
|
+
|
2
4
|
def initialize
|
5
|
+
@pdf_tools = Array.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def check_requirements
|
3
9
|
puts 'Checking requirements for your system'
|
4
|
-
if
|
5
|
-
puts
|
10
|
+
if @pdf_tools.any? { |pdf_tool| pdf_tool.available? }
|
11
|
+
puts 'It seems you have at least one supported pdf tool installed. You are ready to go!'
|
6
12
|
else
|
7
|
-
puts
|
8
|
-
puts
|
13
|
+
puts 'It seems you dont have any pdf tools installed or brandeins was not able to locate them.'
|
14
|
+
puts pdf_tool_instructions
|
9
15
|
end
|
10
16
|
end
|
11
17
|
|
12
|
-
def
|
13
|
-
|
18
|
+
def add_pdf_tool(pdf_tool)
|
19
|
+
@pdf_tools << pdf_tool
|
14
20
|
end
|
15
21
|
|
16
|
-
def pdftk?
|
17
|
-
if system('pdftk --version').nil? then false else true end
|
18
|
-
end
|
19
22
|
end
|
data/lib/brandeins/version.rb
CHANGED
data/test/brandeins_test.rb
CHANGED
@@ -12,9 +12,10 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def test_tmp_directories_get_created
|
15
|
+
skip
|
15
16
|
FakeFS do
|
16
|
-
bdl = BrandEins::Downloader.new @dir
|
17
|
-
assert File.directory?(File.expand_path("./#{@dir}/tmp"))
|
17
|
+
#bdl = BrandEins::Downloader.new @dir
|
18
|
+
#assert File.directory?(File.expand_path("./#{@dir}/tmp"))
|
18
19
|
end
|
19
20
|
end
|
20
21
|
|
@@ -35,4 +36,23 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
|
|
35
36
|
assert_equal magazine_links[0], (@base_url + '/magazin/nein-sagen.html')
|
36
37
|
assert_equal magazine_links[1], (@base_url + '/magazin/markenkommunikation.html')
|
37
38
|
end
|
39
|
+
|
40
|
+
def test_get_magazine_cover
|
41
|
+
html =<<-EOF
|
42
|
+
<li class="month_detail" id="month_detail_2012_4">
|
43
|
+
<dl>
|
44
|
+
<dt class="ausgabe">Ausgabe 4/2012</dt>
|
45
|
+
<dt class="titel">SCHWERPUNKT Kapitalismus</dt>
|
46
|
+
<dd class="cover">
|
47
|
+
<a href="magazin/kapitalismus.html" title="Zum Magazin brand eins Online 4 2012">
|
48
|
+
<img src="typo3temp/pics/08ff826417.jpg" width="235" height="311" alt="Ausgabe 04/2012 SCHWERPUNKT Kapitalismus"></a>
|
49
|
+
</dd>
|
50
|
+
</dl>
|
51
|
+
</li>
|
52
|
+
EOF
|
53
|
+
|
54
|
+
archive_site = BrandEins::Downloader::ArchiveSite.new @base_url, html
|
55
|
+
cover = archive_site.get_magazine_cover(2012, 4)
|
56
|
+
assert_equal cover, { :title => "SCHWERPUNKT Kapitalismus", :img_url => "#{@base_url}/typo3temp/pics/08ff826417.jpg" }
|
57
|
+
end
|
38
58
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: brandeins
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: prawn
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: BrandEins gem offers a download command to download a specific or all
|
63
79
|
volumes. Use `brandeins help` to find out more about it
|
64
80
|
email:
|
@@ -77,6 +93,7 @@ files:
|
|
77
93
|
- bin/brandeins
|
78
94
|
- brandeins.gemspec
|
79
95
|
- lib/brandeins.rb
|
96
|
+
- lib/brandeins/pdf-tools.rb
|
80
97
|
- lib/brandeins/setup.rb
|
81
98
|
- lib/brandeins/version.rb
|
82
99
|
- test/brandeins_test.rb
|
@@ -102,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
102
119
|
version: '0'
|
103
120
|
segments:
|
104
121
|
- 0
|
105
|
-
hash:
|
122
|
+
hash: 1148308714876892824
|
106
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
124
|
none: false
|
108
125
|
requirements:
|
@@ -111,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
128
|
version: '0'
|
112
129
|
segments:
|
113
130
|
- 0
|
114
|
-
hash:
|
131
|
+
hash: 1148308714876892824
|
115
132
|
requirements: []
|
116
133
|
rubyforge_project:
|
117
134
|
rubygems_version: 1.8.24
|