brandeins 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,5 +1,4 @@
1
1
  .DS_Store
2
2
  *.gem
3
3
  .bundle
4
- Gemfile.lock
5
4
  pkg/*
data/Gemfile CHANGED
@@ -4,7 +4,8 @@ group :test do
4
4
  if ENV['RUBY_VERSION'][5,3] == '1.8'
5
5
  gem 'minitest'
6
6
  end
7
- gem 'fakefs'
7
+ gem 'webmock'
8
+ gem 'debugger'
8
9
  end
9
10
 
10
11
 
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ brandeins (0.2.0)
5
+ nokogiri
6
+ prawn
7
+ rake
8
+ thor
9
+
10
+ GEM
11
+ remote: http://rubygems.org/
12
+ specs:
13
+ Ascii85 (1.0.2)
14
+ addressable (2.3.2)
15
+ afm (0.2.0)
16
+ columnize (0.3.6)
17
+ crack (0.3.2)
18
+ debugger (1.3.1)
19
+ columnize (>= 0.3.1)
20
+ debugger-linecache (~> 1.1.1)
21
+ debugger-ruby_core_source (~> 1.1.8)
22
+ debugger-linecache (1.1.2)
23
+ debugger-ruby_core_source (>= 1.1.1)
24
+ debugger-ruby_core_source (1.1.8)
25
+ hashery (2.1.0)
26
+ nokogiri (1.5.6)
27
+ pdf-reader (1.3.0)
28
+ Ascii85 (~> 1.0.0)
29
+ afm (~> 0.2.0)
30
+ hashery (~> 2.0)
31
+ ruby-rc4
32
+ ttfunk
33
+ prawn (0.12.0)
34
+ pdf-reader (>= 0.9.0)
35
+ ttfunk (~> 1.0.2)
36
+ rake (10.0.3)
37
+ ruby-rc4 (0.1.5)
38
+ thor (0.17.0)
39
+ ttfunk (1.0.3)
40
+ webmock (1.9.0)
41
+ addressable (>= 2.2.7)
42
+ crack (>= 0.1.7)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ brandeins!
49
+ debugger
50
+ webmock
data/README.md CHANGED
@@ -12,7 +12,7 @@ several ruby libraries (that you can get through rubygems)
12
12
 
13
13
 
14
14
  ## Install
15
- `gem install brandeins-dl`
15
+ `gem install brandeins`
16
16
 
17
17
 
18
18
  ## Usage
@@ -23,4 +23,4 @@ Download just one magazine
23
23
 
24
24
  Download the whole collecion of a certain year
25
25
 
26
- `brandeins download_all --path=/Path/where/to/download/the/files --year=2011`
26
+ `brandeins download_all --path=/Path/where/to/download/the/files --year=2011 --all`
data/Rakefile CHANGED
@@ -3,7 +3,8 @@ require 'rake/testtask'
3
3
  require './lib/brandeins/version'
4
4
 
5
5
  Rake::TestTask.new do |t|
6
- t.pattern = 'test/*_test.rb'
6
+ t.test_files = FileList['test/*_test.rb', 'specs/*_spec.rb']
7
+ t.verbose = true
7
8
  end
8
9
 
9
10
  task :install do
@@ -15,4 +16,20 @@ task publish: [ :build ] do
15
16
  sh "gem push ./pkg/brandeins-#{BrandEins::VERSION}.gem"
16
17
  end
17
18
 
19
+ rule /^version:bump:(major|minor|patch)/ do |t|
20
+ sh "git status | grep 'nothing to commit'"
21
+ index = ['major', 'minor','patch'].index(t.name.split(':').last)
22
+ file = 'lib/brandeins/version.rb'
23
+
24
+ version_file = File.read(file)
25
+ old_version, *version_parts = version_file.match(/(\d+)\.(\d+)\.(\d+)/).to_a
26
+ version_parts[index] = version_parts[index].to_i + 1
27
+ version_parts[2] = 0 if index < 2
28
+ version_parts[1] = 0 if index < 1
29
+ new_version = version_parts * '.'
30
+ File.open(file,'w'){|f| f.write(version_file.sub(old_version, new_version)) }
31
+
32
+ sh "git add #{file} Gemfile.lock && git commit -m 'bump version to #{new_version}'"
33
+ end
34
+
18
35
  task :default => :test
@@ -1,312 +1,8 @@
1
- %w(
2
- brandeins/version
3
- brandeins/setup
4
- brandeins/pdf-tools
5
- nokogiri
6
- open-uri
7
- uri
8
- fileutils
9
- thor
10
- prawn
11
- ).each do |lib|
12
- begin
13
- require lib
14
- rescue Exception => e
15
- puts "missing #{lib}, #{e.inspect}"
16
- end
17
- end
1
+ # encoding: utf-8
18
2
 
19
- module BrandEins
3
+ require 'brandeins/version'
4
+ require 'brandeins/downloader'
5
+ require 'brandeins/setup'
6
+ require 'brandeins/cli'
20
7
 
21
- class CLI < Thor
22
- map '--version' => :version
23
-
24
- desc '--version', 'Displays current version'
25
- def version
26
- puts BrandEins::VERSION
27
- end
28
-
29
- desc 'download', 'Download past brand eins magazines (use `brandeins help download` to learn more about options)'
30
- method_option :path, :type => :string
31
- method_option :volume, :type => :numeric
32
- method_option :all
33
- method_option :year, :type => :numeric
34
- def download
35
- path = options.path ? File.expand_path(options.path) : Dir.pwd
36
- year = options.year || Time.new.year
37
- all = options.all
38
- volume = options.volume
39
-
40
- if volume.nil? && all.nil?
41
- puts "If you want to download a specific volune use the --volume flag or use --all to download all volumes of a year"
42
- else
43
- downloader = BrandEins::Downloader.new path
44
- if !all.nil?
45
- downloader.get_magazines_of_year year
46
- else
47
- downloader.get_magazine year, volume
48
- end
49
- end
50
- end
51
-
52
- desc 'setup', 'Checks if all requirements for using brandeins gem are met'
53
- method_option :help
54
- def setup
55
- setup = BrandEins::Setup.new
56
- if !options.help.nil?
57
- setup.help
58
- else
59
- setup.run
60
- end
61
- end
62
- end
63
-
64
- class Downloader
65
- attr_reader :archive
66
-
67
- def initialize(path)
68
- @url = 'http://www.brandeins.de'
69
- @archive = false
70
- @dl_dir = path
71
- @tmp_dir = path + '/brand-eins-tmp'
72
- @pdftool = BrandEins::PdfTools.get_pdf_tool
73
- create_tmp_dirs
74
- end
75
-
76
- def setup
77
- @archive = ArchiveSite.new @url
78
- end
79
-
80
- def get_magazines_of_year(year = 2000)
81
- setup
82
- puts "Getting all brand eins magazines of #{year}. This could take a while..."
83
- magazine_links_per_year = @archive.get_magazine_links_by_year(year)
84
- magazine_links_per_year.each_with_index do |magazine_link, index|
85
- volume = index+1
86
- puts "Parsing Volume #{volume} of #{year}"
87
- target_pdf = get_target_pdf(year, volume)
88
- get_magazine_by_link(magazine_link, target_pdf, year, volume)
89
- end
90
- end
91
-
92
- def get_magazine(year = 2000, volume = 1)
93
- setup
94
- puts "Parsing Volume #{volume} of #{year}"
95
- target_pdf = get_target_pdf(year, volume)
96
-
97
- magazine_links = @archive.get_magazine_links_by_year(year)
98
- target_magazine_link = magazine_links[volume-1]
99
-
100
- get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
101
- end
102
-
103
- private
104
- def create_tmp_dirs
105
- FileUtils.mkdir_p @tmp_dir unless File.directory?(@tmp_dir)
106
- end
107
-
108
- def get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
109
- pdf_links = @archive.magazine_pdf_links(target_magazine_link)
110
- pdf_files = download_pdfs(pdf_links)
111
-
112
- pdf_cover = create_cover_pdf(year, volume)
113
- pdf_files = pdf_files.reverse.push(pdf_cover).reverse
114
-
115
- if !@pdftool.nil?
116
- target_pdf_path = "#{@dl_dir}/#{target_pdf}"
117
- @pdftool.merge_pdf_files(pdf_files, target_pdf_path)
118
- cleanup
119
- else
120
- if RUBY_PLATFORM.include? 'darwin'
121
- puts 'brandeins wont merge the single pdf files since it didnt find the pdftk tool'
122
- end
123
- end
124
- end
125
-
126
- def create_cover_pdf(year, volume)
127
- cover = @archive.get_magazine_cover(year, volume)
128
- cover_title = cover[:title]
129
- cover_img_url = cover[:img_url]
130
- cover_img_file = @tmp_dir + "/cover-#{year}-#{volume}.jpg"
131
- cover_pdf_file = @tmp_dir + "/cover-#{year}-#{volume}.pdf"
132
-
133
- File.open(cover_img_file,'w') do |f|
134
- uri = URI.parse(cover_img_url)
135
- Net::HTTP.start(uri.host,uri.port) do |http|
136
- http.request_get(uri.path) do |res|
137
- res.read_body do |seg|
138
- f << seg
139
- #hack -- adjust to suit:
140
- sleep 0.005
141
- end
142
- end
143
- end
144
- end
145
-
146
- require 'prawn'
147
- Prawn::Document.generate(cover_pdf_file) do |pdf|
148
- pdf.text "<font size='18'><b>" + cover_title + "</b></font>", :align => :center, :inline_format => true
149
- pdf.image cover_img_file, :position => :center, :vposition => :center
150
- end
151
- return cover_pdf_file
152
- end
153
-
154
- def get_target_pdf(year, volume)
155
- "Brand-Eins-#{year}-#{volume}.pdf"
156
- end
157
-
158
- def download_pdfs(pdf_links)
159
- pdf_downloader = PDFDownloader.new(pdf_links, @tmp_dir)
160
- pdf_downloader.download_all
161
- end
162
-
163
- def cleanup
164
- FileUtils.rm_r @tmp_dir
165
- end
166
-
167
- class PDFDownloader
168
-
169
- def initialize(pdf_links, dl_dir)
170
- @dl_dir = dl_dir
171
- @pdf_links = pdf_links
172
- end
173
-
174
- def download_all
175
- pdf_files = Array.new
176
- @pdf_links.each do |pdf_link|
177
- pdf_name = @dl_dir + '/' + File.basename(pdf_link)
178
- pdf_url = pdf_link
179
- download_pdf(pdf_url, pdf_name)
180
- pdf_files << pdf_name
181
- end
182
- pdf_files
183
- end
184
-
185
- private
186
-
187
- def download_pdf(pdf_url, filename)
188
- if File.exists? filename
189
- puts "File #{filename} seems to be already downloaded"
190
- return true
191
- end
192
-
193
- puts "Downloading PDF from #{pdf_url} to #{filename}"
194
- File.open(filename,'wb') do |new_file|
195
- open(pdf_url, 'rb') do |read_file|
196
- new_file.write(read_file.read)
197
- end
198
- end
199
- end
200
-
201
- end
202
-
203
- class ArchiveSite
204
- attr_accessor :doc
205
-
206
- def initialize(base_url, html = false)
207
- @base_url = base_url
208
- @archive_url = @base_url + "/archiv.html"
209
- if html
210
- @doc = Nokogiri::HTML(html)
211
- end
212
- end
213
-
214
- def setup
215
- return if defined?(@doc) != nil
216
- @doc = Nokogiri::HTML(open(@archive_url))
217
- end
218
-
219
- def get_magazine_links_by_year(year = 2000)
220
- setup
221
- puts "Loading Magazine from year #{year}"
222
- magazine_nodes_with_meta = @doc.css(".jahrgang-#{year} ul li")
223
- magazine_links = Array.new
224
- magazine_nodes_with_meta.each_with_index do |node, index|
225
- if node['id'].nil? then
226
- link = node.css('a')
227
- if link[0].nil? then
228
- next
229
- end
230
- href = link[0]['href']
231
- magazine_links << @base_url + '/' + href
232
- end
233
- end
234
- magazine_links
235
- end
236
-
237
- def get_magazine_cover(year, volume)
238
- title = @doc.css("#month_detail_#{year}_#{volume} .titel").children[0].to_s
239
- img_url = ''
240
- @doc.css("#month_detail_#{year}_#{volume} .cover a img").each do |node|
241
- img_url = node['src']
242
- end
243
- return { :title => title, :img_url => @base_url + '/' + img_url }
244
- end
245
-
246
- def magazine_pdf_links(url)
247
- magazine = ArchiveMagazine.new(url, @base_url)
248
- magazine.get_magazine_pdf_links
249
- end
250
-
251
- class ArchiveMagazine
252
- attr_accessor :url, :doc
253
-
254
- def initialize(url, base_url, html = false)
255
- puts "Parsing #{url}"
256
- @url = url
257
- @base_url = base_url
258
- @doc = Nokogiri::HTML(open(url))
259
- end
260
-
261
- def get_magazine_pdf_links
262
- [get_editorial_article_links, get_schwerpunkt_article_links].flatten
263
- end
264
-
265
- def get_schwerpunkt_article_links
266
- get_links("div.articleList ul h4 a")
267
- end
268
-
269
- def get_editorial_article_links
270
- get_links(".editorial-links li a")
271
- end
272
-
273
- def get_links(css_selector)
274
- pdf_links = Array.new
275
- link_nodes = @doc.css(css_selector)
276
- link_nodes.each do |node|
277
- article_link = @base_url + '/' + node['href']
278
- article = MagazineArticle.new(article_link)
279
- pdf_link = article.get_pdf_link
280
- if pdf_link.nil? then
281
- puts "------------------------------"
282
- puts "No Content for: #{article_link}"
283
- puts "------------------------------"
284
- else
285
- pdf_links << @base_url + '/' + pdf_link
286
- end
287
- end
288
- pdf_links
289
- end
290
-
291
- class MagazineArticle
292
- attr_accessor :url, :doc
293
-
294
- def initialize(url)
295
- puts "Parsing Article: #{url}"
296
- @url = url
297
- @doc = Nokogiri::HTML(open(url))
298
- end
299
-
300
- def get_pdf_link
301
- link = @doc.css("div#sidebar ul li#downloaden a")
302
- if link[0].nil? then
303
- return nil
304
- else
305
- return link[0]['href']
306
- end
307
- end
308
- end
309
- end
310
- end
311
- end
312
- end
8
+ module BrandEins; end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'thor'
3
+
4
+ module BrandEins
5
+ class CLI < Thor
6
+ map '--version' => :version
7
+
8
+ desc '--version', 'Displays current version'
9
+ def version
10
+ puts BrandEins::VERSION
11
+ end
12
+
13
+ desc 'download', 'Download past brand eins magazines (use `brandeins help download` to learn more about options)'
14
+ method_option :path, :type => :string
15
+ method_option :volume, :type => :numeric
16
+ method_option :all
17
+ method_option :year, :type => :numeric
18
+ def download
19
+ path = options.path ? File.expand_path(options.path) : Dir.pwd
20
+ year = options.year || Time.new.year
21
+ all = options.all
22
+ volume = options.volume
23
+
24
+ if volume.nil? and all.nil?
25
+ puts "If you want to download a specific volune use the --volume flag or use --all to download all volumes of a year"
26
+ else
27
+ downloader = BrandEins::Downloader.new(path, verbose: true)
28
+ if !all.nil?
29
+ downloader.get_magazines_of_year year
30
+ else
31
+ downloader.get_magazine year, volume
32
+ end
33
+ end
34
+ end
35
+
36
+ desc 'setup', 'Checks if all requirements for using brandeins gem are met'
37
+ method_option :help
38
+ def setup
39
+ setup = BrandEins::Setup.new
40
+ if !options.help.nil?
41
+ setup.help
42
+ else
43
+ setup.run
44
+ end
45
+ end
46
+ end
47
+ end