brandeins 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,5 +1,4 @@
1
1
  .DS_Store
2
2
  *.gem
3
3
  .bundle
4
- Gemfile.lock
5
4
  pkg/*
data/Gemfile CHANGED
@@ -4,7 +4,8 @@ group :test do
4
4
  if ENV['RUBY_VERSION'][5,3] == '1.8'
5
5
  gem 'minitest'
6
6
  end
7
- gem 'fakefs'
7
+ gem 'webmock'
8
+ gem 'debugger'
8
9
  end
9
10
 
10
11
 
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ brandeins (0.2.0)
5
+ nokogiri
6
+ prawn
7
+ rake
8
+ thor
9
+
10
+ GEM
11
+ remote: http://rubygems.org/
12
+ specs:
13
+ Ascii85 (1.0.2)
14
+ addressable (2.3.2)
15
+ afm (0.2.0)
16
+ columnize (0.3.6)
17
+ crack (0.3.2)
18
+ debugger (1.3.1)
19
+ columnize (>= 0.3.1)
20
+ debugger-linecache (~> 1.1.1)
21
+ debugger-ruby_core_source (~> 1.1.8)
22
+ debugger-linecache (1.1.2)
23
+ debugger-ruby_core_source (>= 1.1.1)
24
+ debugger-ruby_core_source (1.1.8)
25
+ hashery (2.1.0)
26
+ nokogiri (1.5.6)
27
+ pdf-reader (1.3.0)
28
+ Ascii85 (~> 1.0.0)
29
+ afm (~> 0.2.0)
30
+ hashery (~> 2.0)
31
+ ruby-rc4
32
+ ttfunk
33
+ prawn (0.12.0)
34
+ pdf-reader (>= 0.9.0)
35
+ ttfunk (~> 1.0.2)
36
+ rake (10.0.3)
37
+ ruby-rc4 (0.1.5)
38
+ thor (0.17.0)
39
+ ttfunk (1.0.3)
40
+ webmock (1.9.0)
41
+ addressable (>= 2.2.7)
42
+ crack (>= 0.1.7)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ brandeins!
49
+ debugger
50
+ webmock
data/README.md CHANGED
@@ -12,7 +12,7 @@ several ruby libraries (that you can get through rubygems)
12
12
 
13
13
 
14
14
  ## Install
15
- `gem install brandeins-dl`
15
+ `gem install brandeins`
16
16
 
17
17
 
18
18
  ## Usage
@@ -23,4 +23,4 @@ Download just one magazine
23
23
 
24
24
  Download the whole collecion of a certain year
25
25
 
26
- `brandeins download_all --path=/Path/where/to/download/the/files --year=2011`
26
+ `brandeins download_all --path=/Path/where/to/download/the/files --year=2011 --all`
data/Rakefile CHANGED
@@ -3,7 +3,8 @@ require 'rake/testtask'
3
3
  require './lib/brandeins/version'
4
4
 
5
5
  Rake::TestTask.new do |t|
6
- t.pattern = 'test/*_test.rb'
6
+ t.test_files = FileList['test/*_test.rb', 'specs/*_spec.rb']
7
+ t.verbose = true
7
8
  end
8
9
 
9
10
  task :install do
@@ -15,4 +16,20 @@ task publish: [ :build ] do
15
16
  sh "gem push ./pkg/brandeins-#{BrandEins::VERSION}.gem"
16
17
  end
17
18
 
19
+ rule /^version:bump:(major|minor|patch)/ do |t|
20
+ sh "git status | grep 'nothing to commit'"
21
+ index = ['major', 'minor','patch'].index(t.name.split(':').last)
22
+ file = 'lib/brandeins/version.rb'
23
+
24
+ version_file = File.read(file)
25
+ old_version, *version_parts = version_file.match(/(\d+)\.(\d+)\.(\d+)/).to_a
26
+ version_parts[index] = version_parts[index].to_i + 1
27
+ version_parts[2] = 0 if index < 2
28
+ version_parts[1] = 0 if index < 1
29
+ new_version = version_parts * '.'
30
+ File.open(file,'w'){|f| f.write(version_file.sub(old_version, new_version)) }
31
+
32
+ sh "git add #{file} Gemfile.lock && git commit -m 'bump version to #{new_version}'"
33
+ end
34
+
18
35
  task :default => :test
@@ -1,312 +1,8 @@
1
- %w(
2
- brandeins/version
3
- brandeins/setup
4
- brandeins/pdf-tools
5
- nokogiri
6
- open-uri
7
- uri
8
- fileutils
9
- thor
10
- prawn
11
- ).each do |lib|
12
- begin
13
- require lib
14
- rescue Exception => e
15
- puts "missing #{lib}, #{e.inspect}"
16
- end
17
- end
1
+ # encoding: utf-8
18
2
 
19
- module BrandEins
3
+ require 'brandeins/version'
4
+ require 'brandeins/downloader'
5
+ require 'brandeins/setup'
6
+ require 'brandeins/cli'
20
7
 
21
- class CLI < Thor
22
- map '--version' => :version
23
-
24
- desc '--version', 'Displays current version'
25
- def version
26
- puts BrandEins::VERSION
27
- end
28
-
29
- desc 'download', 'Download past brand eins magazines (use `brandeins help download` to learn more about options)'
30
- method_option :path, :type => :string
31
- method_option :volume, :type => :numeric
32
- method_option :all
33
- method_option :year, :type => :numeric
34
- def download
35
- path = options.path ? File.expand_path(options.path) : Dir.pwd
36
- year = options.year || Time.new.year
37
- all = options.all
38
- volume = options.volume
39
-
40
- if volume.nil? && all.nil?
41
- puts "If you want to download a specific volune use the --volume flag or use --all to download all volumes of a year"
42
- else
43
- downloader = BrandEins::Downloader.new path
44
- if !all.nil?
45
- downloader.get_magazines_of_year year
46
- else
47
- downloader.get_magazine year, volume
48
- end
49
- end
50
- end
51
-
52
- desc 'setup', 'Checks if all requirements for using brandeins gem are met'
53
- method_option :help
54
- def setup
55
- setup = BrandEins::Setup.new
56
- if !options.help.nil?
57
- setup.help
58
- else
59
- setup.run
60
- end
61
- end
62
- end
63
-
64
- class Downloader
65
- attr_reader :archive
66
-
67
- def initialize(path)
68
- @url = 'http://www.brandeins.de'
69
- @archive = false
70
- @dl_dir = path
71
- @tmp_dir = path + '/brand-eins-tmp'
72
- @pdftool = BrandEins::PdfTools.get_pdf_tool
73
- create_tmp_dirs
74
- end
75
-
76
- def setup
77
- @archive = ArchiveSite.new @url
78
- end
79
-
80
- def get_magazines_of_year(year = 2000)
81
- setup
82
- puts "Getting all brand eins magazines of #{year}. This could take a while..."
83
- magazine_links_per_year = @archive.get_magazine_links_by_year(year)
84
- magazine_links_per_year.each_with_index do |magazine_link, index|
85
- volume = index+1
86
- puts "Parsing Volume #{volume} of #{year}"
87
- target_pdf = get_target_pdf(year, volume)
88
- get_magazine_by_link(magazine_link, target_pdf, year, volume)
89
- end
90
- end
91
-
92
- def get_magazine(year = 2000, volume = 1)
93
- setup
94
- puts "Parsing Volume #{volume} of #{year}"
95
- target_pdf = get_target_pdf(year, volume)
96
-
97
- magazine_links = @archive.get_magazine_links_by_year(year)
98
- target_magazine_link = magazine_links[volume-1]
99
-
100
- get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
101
- end
102
-
103
- private
104
- def create_tmp_dirs
105
- FileUtils.mkdir_p @tmp_dir unless File.directory?(@tmp_dir)
106
- end
107
-
108
- def get_magazine_by_link(target_magazine_link, target_pdf, year, volume)
109
- pdf_links = @archive.magazine_pdf_links(target_magazine_link)
110
- pdf_files = download_pdfs(pdf_links)
111
-
112
- pdf_cover = create_cover_pdf(year, volume)
113
- pdf_files = pdf_files.reverse.push(pdf_cover).reverse
114
-
115
- if !@pdftool.nil?
116
- target_pdf_path = "#{@dl_dir}/#{target_pdf}"
117
- @pdftool.merge_pdf_files(pdf_files, target_pdf_path)
118
- cleanup
119
- else
120
- if RUBY_PLATFORM.include? 'darwin'
121
- puts 'brandeins wont merge the single pdf files since it didnt find the pdftk tool'
122
- end
123
- end
124
- end
125
-
126
- def create_cover_pdf(year, volume)
127
- cover = @archive.get_magazine_cover(year, volume)
128
- cover_title = cover[:title]
129
- cover_img_url = cover[:img_url]
130
- cover_img_file = @tmp_dir + "/cover-#{year}-#{volume}.jpg"
131
- cover_pdf_file = @tmp_dir + "/cover-#{year}-#{volume}.pdf"
132
-
133
- File.open(cover_img_file,'w') do |f|
134
- uri = URI.parse(cover_img_url)
135
- Net::HTTP.start(uri.host,uri.port) do |http|
136
- http.request_get(uri.path) do |res|
137
- res.read_body do |seg|
138
- f << seg
139
- #hack -- adjust to suit:
140
- sleep 0.005
141
- end
142
- end
143
- end
144
- end
145
-
146
- require 'prawn'
147
- Prawn::Document.generate(cover_pdf_file) do |pdf|
148
- pdf.text "<font size='18'><b>" + cover_title + "</b></font>", :align => :center, :inline_format => true
149
- pdf.image cover_img_file, :position => :center, :vposition => :center
150
- end
151
- return cover_pdf_file
152
- end
153
-
154
- def get_target_pdf(year, volume)
155
- "Brand-Eins-#{year}-#{volume}.pdf"
156
- end
157
-
158
- def download_pdfs(pdf_links)
159
- pdf_downloader = PDFDownloader.new(pdf_links, @tmp_dir)
160
- pdf_downloader.download_all
161
- end
162
-
163
- def cleanup
164
- FileUtils.rm_r @tmp_dir
165
- end
166
-
167
- class PDFDownloader
168
-
169
- def initialize(pdf_links, dl_dir)
170
- @dl_dir = dl_dir
171
- @pdf_links = pdf_links
172
- end
173
-
174
- def download_all
175
- pdf_files = Array.new
176
- @pdf_links.each do |pdf_link|
177
- pdf_name = @dl_dir + '/' + File.basename(pdf_link)
178
- pdf_url = pdf_link
179
- download_pdf(pdf_url, pdf_name)
180
- pdf_files << pdf_name
181
- end
182
- pdf_files
183
- end
184
-
185
- private
186
-
187
- def download_pdf(pdf_url, filename)
188
- if File.exists? filename
189
- puts "File #{filename} seems to be already downloaded"
190
- return true
191
- end
192
-
193
- puts "Downloading PDF from #{pdf_url} to #{filename}"
194
- File.open(filename,'wb') do |new_file|
195
- open(pdf_url, 'rb') do |read_file|
196
- new_file.write(read_file.read)
197
- end
198
- end
199
- end
200
-
201
- end
202
-
203
- class ArchiveSite
204
- attr_accessor :doc
205
-
206
- def initialize(base_url, html = false)
207
- @base_url = base_url
208
- @archive_url = @base_url + "/archiv.html"
209
- if html
210
- @doc = Nokogiri::HTML(html)
211
- end
212
- end
213
-
214
- def setup
215
- return if defined?(@doc) != nil
216
- @doc = Nokogiri::HTML(open(@archive_url))
217
- end
218
-
219
- def get_magazine_links_by_year(year = 2000)
220
- setup
221
- puts "Loading Magazine from year #{year}"
222
- magazine_nodes_with_meta = @doc.css(".jahrgang-#{year} ul li")
223
- magazine_links = Array.new
224
- magazine_nodes_with_meta.each_with_index do |node, index|
225
- if node['id'].nil? then
226
- link = node.css('a')
227
- if link[0].nil? then
228
- next
229
- end
230
- href = link[0]['href']
231
- magazine_links << @base_url + '/' + href
232
- end
233
- end
234
- magazine_links
235
- end
236
-
237
- def get_magazine_cover(year, volume)
238
- title = @doc.css("#month_detail_#{year}_#{volume} .titel").children[0].to_s
239
- img_url = ''
240
- @doc.css("#month_detail_#{year}_#{volume} .cover a img").each do |node|
241
- img_url = node['src']
242
- end
243
- return { :title => title, :img_url => @base_url + '/' + img_url }
244
- end
245
-
246
- def magazine_pdf_links(url)
247
- magazine = ArchiveMagazine.new(url, @base_url)
248
- magazine.get_magazine_pdf_links
249
- end
250
-
251
- class ArchiveMagazine
252
- attr_accessor :url, :doc
253
-
254
- def initialize(url, base_url, html = false)
255
- puts "Parsing #{url}"
256
- @url = url
257
- @base_url = base_url
258
- @doc = Nokogiri::HTML(open(url))
259
- end
260
-
261
- def get_magazine_pdf_links
262
- [get_editorial_article_links, get_schwerpunkt_article_links].flatten
263
- end
264
-
265
- def get_schwerpunkt_article_links
266
- get_links("div.articleList ul h4 a")
267
- end
268
-
269
- def get_editorial_article_links
270
- get_links(".editorial-links li a")
271
- end
272
-
273
- def get_links(css_selector)
274
- pdf_links = Array.new
275
- link_nodes = @doc.css(css_selector)
276
- link_nodes.each do |node|
277
- article_link = @base_url + '/' + node['href']
278
- article = MagazineArticle.new(article_link)
279
- pdf_link = article.get_pdf_link
280
- if pdf_link.nil? then
281
- puts "------------------------------"
282
- puts "No Content for: #{article_link}"
283
- puts "------------------------------"
284
- else
285
- pdf_links << @base_url + '/' + pdf_link
286
- end
287
- end
288
- pdf_links
289
- end
290
-
291
- class MagazineArticle
292
- attr_accessor :url, :doc
293
-
294
- def initialize(url)
295
- puts "Parsing Article: #{url}"
296
- @url = url
297
- @doc = Nokogiri::HTML(open(url))
298
- end
299
-
300
- def get_pdf_link
301
- link = @doc.css("div#sidebar ul li#downloaden a")
302
- if link[0].nil? then
303
- return nil
304
- else
305
- return link[0]['href']
306
- end
307
- end
308
- end
309
- end
310
- end
311
- end
312
- end
8
+ module BrandEins; end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'thor'
3
+
4
+ module BrandEins
5
+ class CLI < Thor
6
+ map '--version' => :version
7
+
8
+ desc '--version', 'Displays current version'
9
+ def version
10
+ puts BrandEins::VERSION
11
+ end
12
+
13
+ desc 'download', 'Download past brand eins magazines (use `brandeins help download` to learn more about options)'
14
+ method_option :path, :type => :string
15
+ method_option :volume, :type => :numeric
16
+ method_option :all
17
+ method_option :year, :type => :numeric
18
+ def download
19
+ path = options.path ? File.expand_path(options.path) : Dir.pwd
20
+ year = options.year || Time.new.year
21
+ all = options.all
22
+ volume = options.volume
23
+
24
+ if volume.nil? and all.nil?
25
+ puts "If you want to download a specific volune use the --volume flag or use --all to download all volumes of a year"
26
+ else
27
+ downloader = BrandEins::Downloader.new(path, verbose: true)
28
+ if !all.nil?
29
+ downloader.get_magazines_of_year year
30
+ else
31
+ downloader.get_magazine year, volume
32
+ end
33
+ end
34
+ end
35
+
36
+ desc 'setup', 'Checks if all requirements for using brandeins gem are met'
37
+ method_option :help
38
+ def setup
39
+ setup = BrandEins::Setup.new
40
+ if !options.help.nil?
41
+ setup.help
42
+ else
43
+ setup.run
44
+ end
45
+ end
46
+ end
47
+ end