brandeins 0.0.16 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,6 +14,7 @@ Gem::Specification.new do |s|
14
14
  s.add_dependency "rake"
15
15
  s.add_dependency "thor"
16
16
  s.add_dependency "nokogiri"
17
+ s.add_dependency "prawn"
17
18
 
18
19
  s.files = `git ls-files`.split("\n")
19
20
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -1,13 +1,19 @@
1
1
  %w(
2
2
  brandeins/version
3
3
  brandeins/setup
4
+ brandeins/pdf-tools
4
5
  nokogiri
5
6
  open-uri
6
7
  uri
7
8
  fileutils
8
9
  thor
10
+ prawn
9
11
  ).each do |lib|
12
+ begin
10
13
  require lib
14
+ rescue Exception => e
15
+ puts "missing #{lib}, #{e.inspect}"
16
+ end
11
17
  end
12
18
 
13
19
  module BrandEins
@@ -47,6 +53,24 @@ module BrandEins
47
53
  def setup
48
54
  BrandEinsSetup.new
49
55
  end
56
+
57
+ desc 'test', 'test some stuff'
58
+ method_option :input, :type => :string
59
+ method_option :output, :type => :string
60
+ def test
61
+ gs = BrandEins::PdfTools::GhostscriptWin.new
62
+ if gs.available?
63
+ puts "GS is available"
64
+ if options.input.nil? || options.output.nil?
65
+ puts "need input/output to merge files"
66
+ else
67
+ puts "input: #{options.input}, output: #{options.output}"
68
+ BrandEins::PdfTools::GhostscriptWin.merge_pdf_files(options.input, options.output)
69
+ end
70
+ else
71
+ puts "GS not found"
72
+ end
73
+ end
50
74
  end
51
75
 
52
76
  class Downloader
@@ -56,7 +80,8 @@ module BrandEins
56
80
  @url = 'http://www.brandeins.de'
57
81
  @archive = false
58
82
  @dl_dir = path
59
- @tmp_dir = path + '/tmp'
83
+ @tmp_dir = path + '/brand-eins-tmp'
84
+ @pdftool = BrandEins::PdfTools.get_pdf_tool
60
85
  create_tmp_dirs
61
86
  end
62
87
 
@@ -83,7 +108,7 @@ module BrandEins
83
108
  magazine_links = @archive.get_magazine_links_by_year(year)
84
109
  target_magazine_link = magazine_links[volume-1]
85
110
 
86
- get_magazine_by_link(target_magazine_link, target_pdf)
111
+ get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
87
112
  end
88
113
 
89
114
  private
@@ -91,26 +116,58 @@ module BrandEins
91
116
  FileUtils.mkdir_p @tmp_dir unless File.directory?(@tmp_dir)
92
117
  end
93
118
 
94
- def get_magazine_by_link(target_magazine_link, target_pdf)
119
+ def get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
95
120
  pdf_links = @archive.magazine_pdf_links(target_magazine_link)
96
- process_pdf_links(pdf_links, target_pdf)
97
- cleanup
121
+ pdf_files = download_pdfs(pdf_links)
122
+
123
+ pdf_cover = create_cover_pdf(year, volume)
124
+ pdf_files = pdf_files.reverse.push(pdf_cover).reverse
125
+
126
+ if !@pdftool.nil?
127
+ @pdftool.merge_pdf_files(pdf_files, target_pdf)
128
+ cleanup
129
+ else
130
+ if RUBY_PLATFORM.include? 'darwin'
131
+ puts 'brandeins wont merge the single pdf files since it didnt find the pdftk tool'
132
+ end
133
+ end
134
+ end
135
+
136
+ def create_cover_pdf(year, volume)
137
+ cover = @archive.get_magazine_cover(year, volume)
138
+ cover_title = cover[:title]
139
+ cover_img_url = cover[:img_url]
140
+ cover_img_file = @tmp_dir + "/cover-#{year}-#{volume}.jpg"
141
+ cover_pdf_file = @tmp_dir + "/cover-#{year}-#{volume}.pdf"
142
+
143
+ File.open(cover_img_file,'w') do |f|
144
+ uri = URI.parse(cover_img_url)
145
+ Net::HTTP.start(uri.host,uri.port) do |http|
146
+ http.request_get(uri.path) do |res|
147
+ res.read_body do |seg|
148
+ f << seg
149
+ #hack -- adjust to suit:
150
+ sleep 0.005
151
+ end
152
+ end
153
+ end
154
+ end
155
+
156
+ require 'prawn'
157
+ Prawn::Document.generate(cover_pdf_file) do |pdf|
158
+ pdf.text "<font size='18'><b>" + cover_title + "</b></font>", :align => :center, :inline_format => true
159
+ pdf.image cover_img_file, :position => :center, :vposition => :center
160
+ end
161
+ return cover_pdf_file
98
162
  end
99
163
 
100
164
  def get_target_pdf(year, volume)
101
165
  "Brand-Eins-#{year}-#{volume}.pdf"
102
166
  end
103
167
 
104
- def process_pdf_links(pdf_links, target_pdf)
168
+ def download_pdfs(pdf_links)
105
169
  pdf_downloader = PDFDownloader.new(pdf_links, @tmp_dir)
106
- pdf_files = pdf_downloader.download_all
107
- merge_pdfs(pdf_files, target_pdf)
108
- end
109
-
110
- def merge_pdfs(pdf_files, target_pdf)
111
- puts "Merging single PDFs now"
112
- pdf_sources = pdf_files.join(" ")
113
- system "pdftk #{pdf_sources} output #{@dl_dir}/#{target_pdf}"
170
+ pdf_downloader.download_all
114
171
  end
115
172
 
116
173
  def cleanup
@@ -130,7 +187,6 @@ module BrandEins
130
187
  pdf_name = @dl_dir + '/' + File.basename(pdf_link)
131
188
  pdf_url = pdf_link
132
189
  download_pdf(pdf_url, pdf_name)
133
-
134
190
  pdf_files << pdf_name
135
191
  end
136
192
  pdf_files
@@ -139,17 +195,15 @@ module BrandEins
139
195
  private
140
196
 
141
197
  def download_pdf(pdf_url, filename)
198
+ if File.exists? filename
199
+ puts "File #{filename} seems to be already downloaded"
200
+ return true
201
+ end
202
+
142
203
  puts "Downloading PDF from #{pdf_url} to #{filename}"
143
- File.open(filename,'w') do |f|
144
- uri = URI.parse(pdf_url)
145
- Net::HTTP.start(uri.host,uri.port) do |http|
146
- http.request_get(uri.path) do |res|
147
- res.read_body do |seg|
148
- f << seg
149
- #hack -- adjust to suit:
150
- sleep 0.005
151
- end
152
- end
204
+ File.open(filename,'wb') do |new_file|
205
+ open(pdf_url, 'rb') do |read_file|
206
+ new_file.write(read_file.read)
153
207
  end
154
208
  end
155
209
  end
@@ -190,6 +244,15 @@ module BrandEins
190
244
  magazine_links
191
245
  end
192
246
 
247
+ def get_magazine_cover(year, volume)
248
+ title = @doc.css("#month_detail_#{year}_#{volume} .titel").children[0].to_s
249
+ img_url = ''
250
+ @doc.css("#month_detail_#{year}_#{volume} .cover a img").each do |node|
251
+ img_url = node['src']
252
+ end
253
+ return { :title => title, :img_url => @base_url + '/' + img_url }
254
+ end
255
+
193
256
  def magazine_pdf_links(url)
194
257
  magazine = ArchiveMagazine.new(url, @base_url)
195
258
  magazine.get_magazine_pdf_links
@@ -207,7 +270,6 @@ module BrandEins
207
270
 
208
271
  def get_magazine_pdf_links
209
272
  [get_editorial_article_links, get_schwerpunkt_article_links].flatten
210
-
211
273
  end
212
274
 
213
275
  def get_schwerpunkt_article_links
@@ -250,14 +312,11 @@ module BrandEins
250
312
  if link[0].nil? then
251
313
  return nil
252
314
  else
253
- href = link[0]['href']
315
+ return link[0]['href']
254
316
  end
255
317
  end
256
-
257
318
  end
258
-
259
319
  end
260
320
  end
261
-
262
321
  end
263
322
  end
@@ -0,0 +1,88 @@
1
+ module BrandEins
2
+ module PdfTools
3
+ attr_reader :pdf_tools, :pdf_tool
4
+
5
+ def self.get_pdf_tool
6
+ @pdf_tools ||= _init_pdf_tools
7
+ @pdf_tool ||= @pdf_tools.first.new if @pdf_tools.length > 0
8
+ return @pdf_tool
9
+ end
10
+
11
+ class Template
12
+ attr_accessor :cmd, :args, :info
13
+
14
+ def available?
15
+ puts "trying to execute: #{@cmd}"
16
+ _cmd_available? @cmd
17
+ end
18
+
19
+ def merge_pdf_files(pdf_files, target_pdf)
20
+ begin
21
+ puts "args: #{@args}"
22
+ arg_files = pdf_files.join " "
23
+ args = self.args.gsub(/__pdf_files__/, arg_files).gsub(/__target_pdf__/, target_pdf)
24
+ puts "executing: #{@cmd} #{args}"
25
+ open("|#{@cmd} #{args}").close
26
+ rescue Exception => e
27
+ puts "error: #{e.inspect}"
28
+ return false
29
+ end
30
+ return true
31
+ end
32
+
33
+ private
34
+ def _cmd_available? (cmd)
35
+ begin
36
+ open("|#{cmd}").close
37
+ rescue Exception
38
+ return false
39
+ end
40
+ return true
41
+ end
42
+ end
43
+
44
+ class TemplateWin < Template; end
45
+ class TemplateOSX < Template; end
46
+
47
+ class PdftkOSX < TemplateOSX
48
+ def initialize
49
+ @cmd = 'pdftk2'
50
+ @args = '__pdf_files__ output __target_pdf__'
51
+ @info = 'Visit http://test.com'
52
+ end
53
+ end
54
+
55
+ class GhostscriptWin < TemplateWin
56
+ def initialize
57
+ @cmd = '"C:/Program Files/gs/gs9.06/bin/gswin64c.exe"'
58
+ @args = ' -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=__target_pdf__ __pdf_files__'
59
+ @info = 'Visit me!'
60
+ end
61
+ end
62
+
63
+ private
64
+ def self._init_pdf_tools
65
+ @pdf_tools = Array.new
66
+ if RUBY_PLATFORM.include? 'w32'
67
+ return _get_subclasses TemplateWin
68
+ elsif RUBY_PLATFORM.include? 'darwin'
69
+ return _get_subclasses TemplateOSX
70
+ else
71
+ return nil
72
+ end
73
+ end
74
+
75
+ def self._get_subclasses(klass)
76
+ classes = []
77
+ klass.subclasses.each do |sklass|
78
+ classes << sklass.new
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ class Class
85
+ def subclasses
86
+ ObjectSpace.each_object(Class).select { |klass| klass < self }
87
+ end
88
+ end
@@ -1,19 +1,22 @@
1
1
  class BrandEinsSetup
2
+ attr_reader :pdf_tools
3
+
2
4
  def initialize
5
+ @pdf_tools = Array.new
6
+ end
7
+
8
+ def check_requirements
3
9
  puts 'Checking requirements for your system'
4
- if pdftk?
5
- puts "\n" + 'It seems you have pdftk installed on your system.'
10
+ if @pdf_tools.any? { |pdf_tool| pdf_tool.available? }
11
+ puts 'It seems you have at least one supported pdf tool installed. You are ready to go!'
6
12
  else
7
- puts "\n" + 'It seems you are missing pdfk on your system. You are ready to go!'
8
- puts pdfk_install_instructions
13
+ puts 'It seems you dont have any pdf tools installed or brandeins was not able to locate them.'
14
+ puts pdf_tool_instructions
9
15
  end
10
16
  end
11
17
 
12
- def pdfk_install_instructions
13
- 'Visit http://www.pdflabs.com/docs/install-pdftk/ to install pdftk on your system'
18
+ def add_pdf_tool(pdf_tool)
19
+ @pdf_tools << pdf_tool
14
20
  end
15
21
 
16
- def pdftk?
17
- if system('pdftk --version').nil? then false else true end
18
- end
19
22
  end
@@ -1,3 +1,3 @@
1
1
  module BrandEins
2
- VERSION = '0.0.16'
2
+ VERSION = '0.1.2'
3
3
  end
@@ -12,9 +12,10 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
12
12
  end
13
13
 
14
14
  def test_tmp_directories_get_created
15
+ skip
15
16
  FakeFS do
16
- bdl = BrandEins::Downloader.new @dir
17
- assert File.directory?(File.expand_path("./#{@dir}/tmp"))
17
+ #bdl = BrandEins::Downloader.new @dir
18
+ #assert File.directory?(File.expand_path("./#{@dir}/tmp"))
18
19
  end
19
20
  end
20
21
 
@@ -35,4 +36,23 @@ class TestBrandEinsDownload < MiniTest::Unit::TestCase
35
36
  assert_equal magazine_links[0], (@base_url + '/magazin/nein-sagen.html')
36
37
  assert_equal magazine_links[1], (@base_url + '/magazin/markenkommunikation.html')
37
38
  end
39
+
40
+ def test_get_magazine_cover
41
+ html =<<-EOF
42
+ <li class="month_detail" id="month_detail_2012_4">
43
+ <dl>
44
+ <dt class="ausgabe">Ausgabe 4/2012</dt>
45
+ <dt class="titel">SCHWERPUNKT Kapitalismus</dt>
46
+ <dd class="cover">
47
+ <a href="magazin/kapitalismus.html" title="Zum Magazin brand eins Online 4 2012">
48
+ <img src="typo3temp/pics/08ff826417.jpg" width="235" height="311" alt="Ausgabe 04/2012 SCHWERPUNKT Kapitalismus"></a>
49
+ </dd>
50
+ </dl>
51
+ </li>
52
+ EOF
53
+
54
+ archive_site = BrandEins::Downloader::ArchiveSite.new @base_url, html
55
+ cover = archive_site.get_magazine_cover(2012, 4)
56
+ assert_equal cover, { :title => "SCHWERPUNKT Kapitalismus", :img_url => "#{@base_url}/typo3temp/pics/08ff826417.jpg" }
57
+ end
38
58
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: brandeins
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-08 00:00:00.000000000 Z
12
+ date: 2012-11-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -59,6 +59,22 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: prawn
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
62
78
  description: BrandEins gem offers a download command to download a specific or all
63
79
  volumes. Use `brandeins help` to find out more about it
64
80
  email:
@@ -77,6 +93,7 @@ files:
77
93
  - bin/brandeins
78
94
  - brandeins.gemspec
79
95
  - lib/brandeins.rb
96
+ - lib/brandeins/pdf-tools.rb
80
97
  - lib/brandeins/setup.rb
81
98
  - lib/brandeins/version.rb
82
99
  - test/brandeins_test.rb
@@ -102,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
102
119
  version: '0'
103
120
  segments:
104
121
  - 0
105
- hash: 4032879917195506471
122
+ hash: 1148308714876892824
106
123
  required_rubygems_version: !ruby/object:Gem::Requirement
107
124
  none: false
108
125
  requirements:
@@ -111,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
128
  version: '0'
112
129
  segments:
113
130
  - 0
114
- hash: 4032879917195506471
131
+ hash: 1148308714876892824
115
132
  requirements: []
116
133
  rubyforge_project:
117
134
  rubygems_version: 1.8.24