brandeins 0.2.2 → 0.3.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -1
  3. data/.rspec +2 -0
  4. data/.rubocop.yml +5 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +11 -0
  7. data/Gemfile +7 -4
  8. data/Gemfile.lock +47 -21
  9. data/NOTES.md +6 -0
  10. data/Rakefile +15 -8
  11. data/bin/brandeins +3 -1
  12. data/brandeins.gemspec +0 -1
  13. data/lib/brandeins.rb +3 -5
  14. data/lib/brandeins/cli.rb +46 -34
  15. data/lib/brandeins/config.rb +18 -0
  16. data/lib/brandeins/kiosk.rb +100 -0
  17. data/lib/brandeins/merger/external/base.rb +16 -6
  18. data/lib/brandeins/merger/pdf_tools.rb +3 -6
  19. data/lib/brandeins/pages/archive.rb +91 -0
  20. data/lib/brandeins/pages/article.rb +37 -0
  21. data/lib/brandeins/pages/cover.rb +67 -0
  22. data/lib/brandeins/pages/magazine.rb +149 -0
  23. data/lib/brandeins/utils/cli_option_parser.rb +40 -0
  24. data/lib/brandeins/utils/cli_output.rb +100 -0
  25. data/lib/brandeins/utils/fetcher.rb +115 -0
  26. data/lib/brandeins/utils/merger.rb +41 -0
  27. data/lib/brandeins/version.rb +1 -1
  28. data/rubocop-todo.yml +141 -0
  29. data/spec/lib/brandeins/kiosk_spec.rb +66 -0
  30. data/spec/lib/brandeins/pages/archive_spec.rb +40 -0
  31. data/spec/lib/brandeins/pages/article_spec.rb +23 -0
  32. data/spec/lib/brandeins/pages/magazine_spec.rb +91 -0
  33. data/spec/lib/brandeins/utils/fetcher_spec.rb +8 -0
  34. data/spec/lib/brandeins_spec.rb +19 -0
  35. data/spec/spec_helper.rb +23 -0
  36. data/spec/support/capture_stdout.rb +12 -0
  37. data/spec/support/fixtures/archive.html +2365 -0
  38. data/spec/support/fixtures/artikel-masskonfektion-aus-plastik.html +254 -0
  39. data/spec/support/fixtures/artikel-schauspieler-daenemark.html +247 -0
  40. data/{test_support → spec/support}/fixtures/cover.jpg +0 -0
  41. data/spec/support/fixtures/editorial.html +236 -0
  42. data/spec/support/fixtures/just-a.pdf +0 -0
  43. data/spec/support/fixtures/magazine-1-2013.html +242 -0
  44. data/spec/support/fixtures/magazine-cover-fallback.html +1610 -0
  45. data/spec/support/fixtures/magazine-with-cover.html +1416 -0
  46. metadata +68 -61
  47. data/.rvmrc +0 -48
  48. data/lib/brandeins/downloader.rb +0 -111
  49. data/lib/brandeins/errors.rb +0 -5
  50. data/lib/brandeins/parser/archive_site.rb +0 -54
  51. data/lib/brandeins/parser/article_site.rb +0 -26
  52. data/lib/brandeins/parser/magazine_site.rb +0 -49
  53. data/lib/brandeins/setup.rb +0 -38
  54. data/specs/brandeins_spec.rb +0 -52
  55. data/specs/spec_helper.rb +0 -1
  56. data/test/brandeins_test.rb +0 -65
  57. data/test/helper.rb +0 -1
  58. data/test_support/capture_stdout.rb +0 -12
  59. data/test_support/fixtures/brandeins_archiv.html +0 -50
@@ -0,0 +1,100 @@
1
+ require 'singleton'
2
+ require 'io/console'
3
+
4
+ module BrandEins
5
+ module Utils
6
+ class CliOutput
7
+ include Singleton
8
+
9
+ def initialize
10
+ @opts = {}
11
+ @opts[:console_height], @opts[:console_width] = out.winsize
12
+ end
13
+
14
+ def out
15
+ $stdout
16
+ end
17
+
18
+ def set_options(opts)
19
+ @opts = default_opts.merge(opts)
20
+ end
21
+
22
+ def debug?
23
+ @opts[:debug]
24
+ end
25
+
26
+ def info?
27
+ @opts[:info]
28
+ end
29
+
30
+ def warning?
31
+ @opts[:warning]
32
+ end
33
+
34
+ def error?
35
+ @opts[:error]
36
+ end
37
+
38
+ def default_opts
39
+ {
40
+ info: true,
41
+ warning: true,
42
+ error: true,
43
+ debug: false
44
+ }
45
+ end
46
+
47
+ def clear_line
48
+ out.write "\r"
49
+ out.write "\e[K"
50
+ out.flush
51
+ end
52
+
53
+ def print(msg, opts = {})
54
+ msg = conformize(msg) unless opts[:full_length]
55
+ out.write(msg)
56
+ out.flush
57
+ end
58
+
59
+ def println(msg, opts = {})
60
+ msg += "\n" unless msg.end_with? "\n"
61
+ print(msg, opts)
62
+ end
63
+
64
+ def console_width
65
+ @opts[:console_width] || 80
66
+ end
67
+
68
+ def line_up
69
+ out.write "\e[A"
70
+ out.flush
71
+ end
72
+
73
+ def conformize(msg)
74
+ return msg if msg.size < console_width
75
+ msg[0, console_width - 12] + '…' + msg[msg.length - 6, 6]
76
+ end
77
+
78
+ def debug(msg, &block)
79
+ println(msg, full_length: true) if debug?
80
+ block.call if block_given?
81
+ end
82
+
83
+ def info(msg, &block)
84
+ println msg if info?
85
+ block.call if block_given?
86
+ end
87
+
88
+ def statusline(msg, &block)
89
+ print msg if info?
90
+ result = block.call if block_given?
91
+ clear_line if info?
92
+ result
93
+ end
94
+
95
+ def warning(msg)
96
+ println msg if warning?
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,115 @@
1
+ # encoding: utf-8
2
+
3
+ require 'uri'
4
+ require 'net/http'
5
+ require 'singleton'
6
+ require 'pathname'
7
+ require 'digest/md5'
8
+
9
+ require_relative '../config'
10
+ require_relative 'cli_output'
11
+
12
+ module BrandEins
13
+ module Utils
14
+
15
+ class Fetcher
16
+ include Singleton
17
+
18
+ ContentNotFetchedError = Class.new StandardError
19
+
20
+ def fetch(url)
21
+ remove_oldest_cache_file if cache_limit_reached?
22
+ if cache_has_file?(url)
23
+ cli.debug "Fetching file from cache: #{url}" do
24
+ file_from_cache(url)
25
+ end
26
+ else
27
+ cli.debug "Fetching file from url: #{url}" do
28
+ http_get_resource(url)
29
+ end
30
+ end
31
+ end
32
+
33
+ def http_get_resource(url)
34
+ cli.statusline "Fetching: #{url}" do
35
+ uri = URI.parse(url)
36
+ resp = Net::HTTP.get_response(uri)
37
+ unless resp.class == Net::HTTPOK
38
+ raise ContentNotFetchedError, "Received #{resp.code}: #{resp.msg}"
39
+ end
40
+ write_to_cache(url, resp.body)
41
+ resp.body
42
+ end
43
+ end
44
+
45
+ def write_to_cache(file_name, file_content)
46
+ cache_file_path = cache_path_for_file_name(file_name)
47
+ cli.debug "Writing file to cache: #{cache_file_path}" do
48
+ result = !!File.binwrite(cache_file_path, file_content)
49
+ add_to_cache(file_name, file_content)
50
+ return result
51
+ end
52
+ end
53
+
54
+ def file_from_cache(file_name)
55
+ cache_file_path = cache_path_for_file_name(file_name)
56
+ cli.statusline "Reading file from cache: #{file_name}" do
57
+ cache_file_path = cache_path_for_file_name(file_name)
58
+ File.binread(cache_file_path)
59
+ end
60
+ end
61
+
62
+ def cache_has_file?(file_name)
63
+ cache_path = cache_path_for_file_name(file_name)
64
+ cache_files.key? cache_path
65
+ end
66
+
67
+ def cache_path_for_file_name(file_name)
68
+ cache_path + escaped_file_name(file_name)
69
+ end
70
+
71
+ def escaped_file_name(file_name)
72
+ uri = URI.parse(file_name)
73
+ [uri.host, File.basename(uri.path)].compact.join('-')
74
+ end
75
+
76
+ def cache_limit_in_bytes
77
+ BrandEins::Config['cache_limit_bytes']
78
+ end
79
+
80
+ def cache_path
81
+ BrandEins::Config['cache_path']
82
+ end
83
+
84
+ def cache_limit_reached?
85
+ cache_size_in_bytes > cache_limit_in_bytes
86
+ end
87
+
88
+ def cache_size_in_bytes
89
+ cache_files.reduce(0) { |sum, (file, _)| next unless file; sum += File.size?(file) || 0 }
90
+ end
91
+
92
+ def remove_oldest_cache_file
93
+ oldest_file = cache_files.sort_by { |file, time| time }.last.first
94
+ cli.debug "Removing cached file: #{oldest_file}" do
95
+ FileUtils.rm oldest_file
96
+ cache_files.delete(oldest_file)
97
+ end
98
+ end
99
+
100
+ def add_to_cache(file_name, file_content)
101
+ cache_file_path = cache_path_for_file_name(file_name)
102
+ cache_files[cache_file_path.to_s] = File.mtime(cache_file_path)
103
+ end
104
+
105
+ def cache_files
106
+ @cache_files ||= Hash[Dir[cache_path + './*'].map { |f| [ f, File.mtime(f) ] }]
107
+ end
108
+
109
+ def cli
110
+ @cli ||= BrandEins::Utils::CliOutput.instance
111
+ end
112
+
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,41 @@
1
+ require 'singleton'
2
+ require 'prawn'
3
+
4
+ require_relative './cli_output'
5
+
6
+ module BrandEins
7
+ module Utils
8
+ class Merger
9
+ include Singleton
10
+
11
+ def initialize
12
+ end
13
+
14
+ def merge_pdf_files(pdf_file_paths, target_pdf)
15
+ cli.info "Merging pdf files to: #{target_pdf}" do
16
+ Prawn::Document.generate(target_pdf, pdf_options) do |pdf|
17
+ pdf_file_paths.each do |pdf_file|
18
+ if File.exists?(pdf_file)
19
+ pdf_temp_nb_pages = Prawn::Document.new(template: pdf_file).page_count
20
+ (1..pdf_temp_nb_pages).each do |i|
21
+ pdf.start_new_page(template: pdf_file, template_page: i)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ def pdf_options
30
+ {
31
+ page_size: 'A4',
32
+ skip_page_creation: true
33
+ }
34
+ end
35
+
36
+ def cli
37
+ @cli ||= BrandEins::Utils::CliOutput.instance
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,3 +1,3 @@
1
1
  module BrandEins
2
- VERSION = '0.2.2'
2
+ VERSION = '0.3.0.pre'
3
3
  end
@@ -0,0 +1,141 @@
1
+ # This configuration was generated by `rubocop --auto-gen-config`.
2
+ # The point is for the user to remove these configuration records
3
+ # one by one as the offences are removed from the code base.
4
+
5
+ AccessControl:
6
+ Enabled: false
7
+
8
+ AlignParameters:
9
+ Enabled: false
10
+
11
+ AndOr:
12
+ Enabled: false
13
+
14
+ AssignmentInCondition:
15
+ Enabled: false
16
+
17
+ AvoidClassVars:
18
+ Enabled: false
19
+
20
+ AvoidFor:
21
+ Enabled: false
22
+
23
+ AvoidPerlBackrefs:
24
+ Enabled: false
25
+
26
+ BlockAlignment:
27
+ Enabled: false
28
+
29
+ BlockNesting:
30
+ Enabled: false
31
+
32
+ Blocks:
33
+ Enabled: false
34
+
35
+ CaseEquality:
36
+ Enabled: false
37
+
38
+ CaseIndentation:
39
+ Enabled: false
40
+
41
+ CollectionMethods:
42
+ Enabled: false
43
+
44
+ Documentation:
45
+ Enabled: false
46
+
47
+ EmptyLineBetweenDefs:
48
+ Enabled: false
49
+
50
+ EmptyLines:
51
+ Enabled: false
52
+
53
+ EmptyLiteral:
54
+ Enabled: false
55
+
56
+ FavorUnlessOverNegatedIf:
57
+ Enabled: false
58
+
59
+ HashSyntax:
60
+ Enabled: false
61
+
62
+ IfUnlessModifier:
63
+ Enabled: false
64
+
65
+ LeadingCommentSpace:
66
+ Enabled: false
67
+
68
+ LineLength:
69
+ Enabled: false
70
+
71
+ LiteralInCondition:
72
+ Enabled: false
73
+
74
+ MethodLength:
75
+ Enabled: false
76
+
77
+ MultilineTernaryOperator:
78
+ Enabled: false
79
+
80
+ NumericLiterals:
81
+ Enabled: false
82
+
83
+ ParenthesesAroundCondition:
84
+ Enabled: false
85
+
86
+ RedundantReturn:
87
+ Enabled: false
88
+
89
+ RedundantSelf:
90
+ Enabled: false
91
+
92
+ RescueException:
93
+ Enabled: false
94
+
95
+ Semicolon:
96
+ Enabled: false
97
+
98
+ ShadowingOuterLocalVariable:
99
+ Enabled: false
100
+
101
+ SingleLineMethods:
102
+ Enabled: false
103
+
104
+ SpaceAfterComma:
105
+ Enabled: false
106
+
107
+ SpaceAroundBraces:
108
+ Enabled: false
109
+
110
+ SpaceAroundEqualsInParameterDefault:
111
+ Enabled: false
112
+
113
+ SpaceAroundOperators:
114
+ Enabled: false
115
+
116
+ SpaceInsideBrackets:
117
+ Enabled: false
118
+
119
+ SpaceInsideParens:
120
+ Enabled: false
121
+
122
+ StringLiterals:
123
+ Enabled: false
124
+
125
+ Syntax:
126
+ Enabled: false
127
+
128
+ TrailingWhitespace:
129
+ Enabled: false
130
+
131
+ UnlessElse:
132
+ Enabled: false
133
+
134
+ UnusedLocalVariable:
135
+ Enabled: false
136
+
137
+ UselessAssignment:
138
+ Enabled: false
139
+
140
+ WordArray:
141
+ Enabled: false
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../../spec_helper'
4
+
5
+ require_lib 'brandeins'
6
+ require_lib 'brandeins/config'
7
+ require_lib 'brandeins/kiosk'
8
+
9
+ require 'tempfile'
10
+ require 'pathname'
11
+
12
+ describe BrandEins::Kiosk do
13
+ describe '#initialize(options)' do
14
+ it 'takes :path from options' do
15
+ path = Tempfile.new('test').path
16
+ kiosk = BrandEins::Kiosk.new(path: path)
17
+ expect(kiosk.target_path).to eq path
18
+ end
19
+
20
+ it 'defauls to the current working directory if no path is given' do
21
+ kiosk = BrandEins::Kiosk.new
22
+ cwd = Pathname.new('.').realpath.to_s
23
+ expect(kiosk.target_path).to eq cwd
24
+ end
25
+
26
+ it 'raises InvalidPathError if the given path is not accessible' do
27
+ invalid_path = '/root'
28
+ expect {
29
+ BrandEins::Kiosk.new(path: invalid_path)
30
+ }.to raise_error BrandEins::Kiosk::InvalidPathError
31
+ end
32
+ end
33
+
34
+ describe '#fetch_magazine(month: 1, year: 2013)' do
35
+ it 'returns a magzine object' do
36
+ archive_html = load_fixture 'archive.html'
37
+ stub_request(:get, BrandEins::Config['archive_uri']).
38
+ to_return(body: archive_html)
39
+
40
+ magazine_html = load_fixture 'magazine-1-2013.html'
41
+ stub_request(:get, 'http://www.brandeins.de/archiv/2013/neugier.html').
42
+ to_return(status: 200, body: magazine_html)
43
+
44
+ article_masskonfektion_html = load_fixture 'artikel-masskonfektion-aus-plastik.html'
45
+ stub_request(:get, 'http://www.brandeins.de/archiv/2013/neugier/masskonfektion-aus-plastik.html').
46
+ to_return(status: 200, body: article_masskonfektion_html)
47
+
48
+ article_daenemark_html = load_fixture 'artikel-schauspieler-daenemark.html'
49
+ stub_request(:get, 'http://www.brandeins.de/archiv/2013/neugier/ein-schauspieler-in-daenemark.html').
50
+ to_return(status: 200, body: article_daenemark_html)
51
+
52
+ pdf_file = load_fixture 'just-a.pdf'
53
+ stub_request(:get, "http://www.brandeins.de/uploads/tx_b4/030_b1_01_13_prototypen.pdf").
54
+ to_return(body: pdf_file)
55
+ stub_request(:get, "http://www.brandeins.de/uploads/tx_b4/008_b1_01_13_mikrooekonomie.pdf").
56
+ to_return(body: pdf_file)
57
+ stub_request(:get, "http://www.brandeins.de/typo3temp/pics/titel_0113_77be1ece47.jpg").
58
+ to_return(status: 400, body: "")
59
+
60
+
61
+ kiosk = BrandEins::Kiosk.new
62
+ magazine = kiosk.fetch_magazine(month: 1, year: 2013)
63
+ expect(magazine).to be_a BrandEins::Pages::Magazine
64
+ end
65
+ end
66
+ end