natgal-dl 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/bin/natgal-dl +187 -0
  2. metadata +64 -0
data/bin/natgal-dl ADDED
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env ruby
2
+ require "open-uri"
3
+ require "fileutils"
4
+ require "progressbar"
5
+ require "cgi"
6
+
7
+ unless system("which convert > /dev/null")
8
+ raise "ImageMagick is not installed or not in the path."
9
+ end
10
+
11
+ class NationalGallery
12
+ NoDataError = Class.new(RuntimeError)
13
+
14
+ OBFUSCATOR = "vRfOdXapKz".scan(/./)
15
+ TILE_URI = "http://www.nationalgallery.org.uk/custom/ng/tile.php?id=%s"
16
+ USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
17
+ TEMP_DIR = "/tmp/natgal-tiles"
18
+
19
+ attr_reader :page_uri
20
+
21
+ def initialize(page_uri)
22
+ @page_uri = page_uri
23
+ end
24
+
25
+ def grab
26
+ $stderr.puts title
27
+ fetch_tiles
28
+ tiles = prepared_tile_paths
29
+ stitched_image = "%04d.miff" % content_id # MIFF is native to ImageMagick
30
+ stitch_tiles(tiles, stitched_image)
31
+ crop(stitched_image, filename)
32
+ FileUtils.rm(stitched_image)
33
+ end
34
+
35
+ private
36
+ def filename
37
+ title.gsub(/[^a-z0-9]+/i, "-") + ".jpg"
38
+ end
39
+
40
+ def stitch_tiles(files, destination)
41
+ tiling = "%dx%d" % [tiles_across, tiles_down]
42
+ tile_size_param = "%dx%d" % [tile_size, tile_size]
43
+ report "Stitching" do
44
+ system(*(["montage", "-geometry", tile_size_param, "-tile", tiling] + files + [destination]))
45
+ end
46
+ end
47
+
48
+ def crop(source, destination)
49
+ final_size = "%dx%d+0+0" % [width.ceil, height.ceil]
50
+ report "Cropping" do
51
+ system("convert", "-crop", final_size, source, destination)
52
+ end
53
+ end
54
+
55
+ def fetch_tiles
56
+ progress = ProgressBar.new("Fetching", tiles_across * tiles_down)
57
+ (0 ... tiles_down).each do |y|
58
+ (0 ... tiles_across).each do |x|
59
+ fetch_tile(x, y)
60
+ progress.inc
61
+ end
62
+ end
63
+ progress.finish
64
+ end
65
+
66
+ def prepared_tile_paths
67
+ files = []
68
+ progress = ProgressBar.new("Preparing", tiles_across * tiles_down)
69
+ (0 ... tiles_down).each do |y|
70
+ (0 ... tiles_across).each do |x|
71
+ path = local_tile_path(x, y)
72
+ if x == (tiles_across - 1) || y == (tiles_down - 1)
73
+ modified_path = local_tile_path(x, y, "modified")
74
+ unless File.exist?(modified_path)
75
+ tile_size_param = "%dx%d" % [tile_size, tile_size]
76
+ system "convert", "-size", tile_size_param, "xc:white", modified_path
77
+ system "composite", "-geometry", "+0+0", path, modified_path, modified_path
78
+ end
79
+ files << modified_path
80
+ else
81
+ files << path
82
+ end
83
+ progress.inc
84
+ end
85
+ end
86
+ progress.finish
87
+ files
88
+ end
89
+
90
+ def local_tile_path(x, y, suffix=nil)
91
+ File.join(TEMP_DIR, "%04d-%02d-%02d%s.jpg" % [content_id, y, x, suffix ? "-" + suffix : ""])
92
+ end
93
+
94
+ def fetch_tile(x, y)
95
+ path = local_tile_path(x, y)
96
+ return if File.exist?(path)
97
+ FileUtils.mkdir_p(File.dirname(path))
98
+ data = get(tile_uri(x, y))
99
+ File.open(path, "wb") do |f|
100
+ f << data
101
+ end
102
+ end
103
+
104
+ def tile_uri(x, y)
105
+ key = "%02d0%04d%02d0%d" % [y, content_id, x, zoom]
106
+ obfuscated = key.gsub(/\d/){ |m| OBFUSCATOR[m.to_i] }
107
+ TILE_URI % obfuscated
108
+ end
109
+
110
+ def data
111
+ return @data if @data
112
+ dl = page_source[%r{<dl class="data".*?</dl>}]
113
+ raise NoDataError unless dl
114
+ entries = []
115
+ dl.scan(%r{<d[td]>([^<]*)</d[td]>}) do |m|
116
+ value = m[0]
117
+ case value
118
+ when /^-?\d+$/
119
+ entries << value.to_i
120
+ when /^-?\d+\.\d+$/
121
+ entries << value.to_f
122
+ else
123
+ entries << value
124
+ end
125
+ end
126
+ @data = Hash[*entries]
127
+ end
128
+
129
+ def zoom
130
+ data["max"]
131
+ end
132
+
133
+ def content_id
134
+ data["contentId"]
135
+ end
136
+
137
+ def width
138
+ data["width"]
139
+ end
140
+
141
+ def height
142
+ data["height"]
143
+ end
144
+
145
+ def tile_size
146
+ data["tileSize"]
147
+ end
148
+
149
+ def tiles_across
150
+ (width / tile_size).ceil
151
+ end
152
+
153
+ def tiles_down
154
+ (height / tile_size).ceil
155
+ end
156
+
157
+ def title
158
+ CGI.unescapeHTML(page_source[%r{<h1[^>]*>([^<]+)</h1>}, 1])
159
+ end
160
+
161
+ def page_source
162
+ @page_source ||= get(page_uri)
163
+ end
164
+
165
+ def get(uri)
166
+ open(uri, "User-Agent" => USER_AGENT, "Referer" => page_uri){ |f| f.read }
167
+ end
168
+
169
+ def report(message)
170
+ $stderr.print message, " ... "
171
+ $stderr.flush
172
+ yield
173
+ $stderr.puts "done"
174
+ end
175
+ end
176
+
177
+ if ARGV.empty?
178
+ puts "Usage: #{$0} gallery_uri [gallery_uri [...]]"
179
+ end
180
+
181
+ ARGV.each do |uri|
182
+ begin
183
+ NationalGallery.new(uri).grab
184
+ rescue NationalGallery::NoDataError
185
+ $stderr.puts "This painting does not appear to be available at high resolution."
186
+ end
187
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: natgal-dl
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Paul Battley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ type: :runtime
16
+ name: progressbar
17
+ prerelease: false
18
+ requirement: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ none: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ! '>='
27
+ - !ruby/object:Gem::Version
28
+ version: '0'
29
+ none: false
30
+ description: Download high-resolution images of paintings in the National Gallery
31
+ collection.
32
+ email:
33
+ - pbattley@gmail.com
34
+ executables:
35
+ - natgal-dl
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - bin/natgal-dl
40
+ homepage: https://github.com/threedaymonk/natgal-dl
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ none: false
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ none: false
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.23
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: National Gallery picture downloader
64
+ test_files: []