natgal-dl 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/natgal-dl +187 -0
- metadata +64 -0
data/bin/natgal-dl
ADDED
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "open-uri"
|
3
|
+
require "fileutils"
|
4
|
+
require "progressbar"
|
5
|
+
require "cgi"
|
6
|
+
|
7
|
+
unless system("which convert > /dev/null")
|
8
|
+
raise "ImageMagick is not installed or not in the path."
|
9
|
+
end
|
10
|
+
|
11
|
+
class NationalGallery
|
12
|
+
NoDataError = Class.new(RuntimeError)
|
13
|
+
|
14
|
+
OBFUSCATOR = "vRfOdXapKz".scan(/./)
|
15
|
+
TILE_URI = "http://www.nationalgallery.org.uk/custom/ng/tile.php?id=%s"
|
16
|
+
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
|
17
|
+
TEMP_DIR = "/tmp/natgal-tiles"
|
18
|
+
|
19
|
+
attr_reader :page_uri
|
20
|
+
|
21
|
+
def initialize(page_uri)
|
22
|
+
@page_uri = page_uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def grab
|
26
|
+
$stderr.puts title
|
27
|
+
fetch_tiles
|
28
|
+
tiles = prepared_tile_paths
|
29
|
+
stitched_image = "%04d.miff" % content_id # MIFF is native to ImageMagick
|
30
|
+
stitch_tiles(tiles, stitched_image)
|
31
|
+
crop(stitched_image, filename)
|
32
|
+
FileUtils.rm(stitched_image)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def filename
|
37
|
+
title.gsub(/[^a-z0-9]+/i, "-") + ".jpg"
|
38
|
+
end
|
39
|
+
|
40
|
+
def stitch_tiles(files, destination)
|
41
|
+
tiling = "%dx%d" % [tiles_across, tiles_down]
|
42
|
+
tile_size_param = "%dx%d" % [tile_size, tile_size]
|
43
|
+
report "Stitching" do
|
44
|
+
system(*(["montage", "-geometry", tile_size_param, "-tile", tiling] + files + [destination]))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def crop(source, destination)
|
49
|
+
final_size = "%dx%d+0+0" % [width.ceil, height.ceil]
|
50
|
+
report "Cropping" do
|
51
|
+
system("convert", "-crop", final_size, source, destination)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def fetch_tiles
|
56
|
+
progress = ProgressBar.new("Fetching", tiles_across * tiles_down)
|
57
|
+
(0 ... tiles_down).each do |y|
|
58
|
+
(0 ... tiles_across).each do |x|
|
59
|
+
fetch_tile(x, y)
|
60
|
+
progress.inc
|
61
|
+
end
|
62
|
+
end
|
63
|
+
progress.finish
|
64
|
+
end
|
65
|
+
|
66
|
+
def prepared_tile_paths
|
67
|
+
files = []
|
68
|
+
progress = ProgressBar.new("Preparing", tiles_across * tiles_down)
|
69
|
+
(0 ... tiles_down).each do |y|
|
70
|
+
(0 ... tiles_across).each do |x|
|
71
|
+
path = local_tile_path(x, y)
|
72
|
+
if x == (tiles_across - 1) || y == (tiles_down - 1)
|
73
|
+
modified_path = local_tile_path(x, y, "modified")
|
74
|
+
unless File.exist?(modified_path)
|
75
|
+
tile_size_param = "%dx%d" % [tile_size, tile_size]
|
76
|
+
system "convert", "-size", tile_size_param, "xc:white", modified_path
|
77
|
+
system "composite", "-geometry", "+0+0", path, modified_path, modified_path
|
78
|
+
end
|
79
|
+
files << modified_path
|
80
|
+
else
|
81
|
+
files << path
|
82
|
+
end
|
83
|
+
progress.inc
|
84
|
+
end
|
85
|
+
end
|
86
|
+
progress.finish
|
87
|
+
files
|
88
|
+
end
|
89
|
+
|
90
|
+
def local_tile_path(x, y, suffix=nil)
|
91
|
+
File.join(TEMP_DIR, "%04d-%02d-%02d%s.jpg" % [content_id, y, x, suffix ? "-" + suffix : ""])
|
92
|
+
end
|
93
|
+
|
94
|
+
def fetch_tile(x, y)
|
95
|
+
path = local_tile_path(x, y)
|
96
|
+
return if File.exist?(path)
|
97
|
+
FileUtils.mkdir_p(File.dirname(path))
|
98
|
+
data = get(tile_uri(x, y))
|
99
|
+
File.open(path, "wb") do |f|
|
100
|
+
f << data
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def tile_uri(x, y)
|
105
|
+
key = "%02d0%04d%02d0%d" % [y, content_id, x, zoom]
|
106
|
+
obfuscated = key.gsub(/\d/){ |m| OBFUSCATOR[m.to_i] }
|
107
|
+
TILE_URI % obfuscated
|
108
|
+
end
|
109
|
+
|
110
|
+
def data
|
111
|
+
return @data if @data
|
112
|
+
dl = page_source[%r{<dl class="data".*?</dl>}]
|
113
|
+
raise NoDataError unless dl
|
114
|
+
entries = []
|
115
|
+
dl.scan(%r{<d[td]>([^<]*)</d[td]>}) do |m|
|
116
|
+
value = m[0]
|
117
|
+
case value
|
118
|
+
when /^-?\d+$/
|
119
|
+
entries << value.to_i
|
120
|
+
when /^-?\d+\.\d+$/
|
121
|
+
entries << value.to_f
|
122
|
+
else
|
123
|
+
entries << value
|
124
|
+
end
|
125
|
+
end
|
126
|
+
@data = Hash[*entries]
|
127
|
+
end
|
128
|
+
|
129
|
+
def zoom
|
130
|
+
data["max"]
|
131
|
+
end
|
132
|
+
|
133
|
+
def content_id
|
134
|
+
data["contentId"]
|
135
|
+
end
|
136
|
+
|
137
|
+
def width
|
138
|
+
data["width"]
|
139
|
+
end
|
140
|
+
|
141
|
+
def height
|
142
|
+
data["height"]
|
143
|
+
end
|
144
|
+
|
145
|
+
def tile_size
|
146
|
+
data["tileSize"]
|
147
|
+
end
|
148
|
+
|
149
|
+
def tiles_across
|
150
|
+
(width / tile_size).ceil
|
151
|
+
end
|
152
|
+
|
153
|
+
def tiles_down
|
154
|
+
(height / tile_size).ceil
|
155
|
+
end
|
156
|
+
|
157
|
+
def title
|
158
|
+
CGI.unescapeHTML(page_source[%r{<h1[^>]*>([^<]+)</h1>}, 1])
|
159
|
+
end
|
160
|
+
|
161
|
+
def page_source
|
162
|
+
@page_source ||= get(page_uri)
|
163
|
+
end
|
164
|
+
|
165
|
+
def get(uri)
|
166
|
+
open(uri, "User-Agent" => USER_AGENT, "Referer" => page_uri){ |f| f.read }
|
167
|
+
end
|
168
|
+
|
169
|
+
def report(message)
|
170
|
+
$stderr.print message, " ... "
|
171
|
+
$stderr.flush
|
172
|
+
yield
|
173
|
+
$stderr.puts "done"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
if ARGV.empty?
|
178
|
+
puts "Usage: #{$0} gallery_uri [gallery_uri [...]]"
|
179
|
+
end
|
180
|
+
|
181
|
+
ARGV.each do |uri|
|
182
|
+
begin
|
183
|
+
NationalGallery.new(uri).grab
|
184
|
+
rescue NationalGallery::NoDataError
|
185
|
+
$stderr.puts "This painting does not appear to be available at high resolution."
|
186
|
+
end
|
187
|
+
end
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: natgal-dl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Paul Battley
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
type: :runtime
|
16
|
+
name: progressbar
|
17
|
+
prerelease: false
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
none: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
requirements:
|
26
|
+
- - ! '>='
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '0'
|
29
|
+
none: false
|
30
|
+
description: Download high-resolution images of paintings in the National Gallery
|
31
|
+
collection.
|
32
|
+
email:
|
33
|
+
- pbattley@gmail.com
|
34
|
+
executables:
|
35
|
+
- natgal-dl
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- bin/natgal-dl
|
40
|
+
homepage: https://github.com/threedaymonk/natgal-dl
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
none: false
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
none: false
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.23
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: National Gallery picture downloader
|
64
|
+
test_files: []
|