natgal-dl 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/natgal-dl +187 -0
- metadata +64 -0
data/bin/natgal-dl
ADDED
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "open-uri"
|
3
|
+
require "fileutils"
|
4
|
+
require "progressbar"
|
5
|
+
require "cgi"
|
6
|
+
|
7
|
+
unless system("which convert > /dev/null")
|
8
|
+
raise "ImageMagick is not installed or not in the path."
|
9
|
+
end
|
10
|
+
|
11
|
+
class NationalGallery
|
12
|
+
NoDataError = Class.new(RuntimeError)
|
13
|
+
|
14
|
+
OBFUSCATOR = "vRfOdXapKz".scan(/./)
|
15
|
+
TILE_URI = "http://www.nationalgallery.org.uk/custom/ng/tile.php?id=%s"
|
16
|
+
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
|
17
|
+
TEMP_DIR = "/tmp/natgal-tiles"
|
18
|
+
|
19
|
+
attr_reader :page_uri
|
20
|
+
|
21
|
+
def initialize(page_uri)
|
22
|
+
@page_uri = page_uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def grab
|
26
|
+
$stderr.puts title
|
27
|
+
fetch_tiles
|
28
|
+
tiles = prepared_tile_paths
|
29
|
+
stitched_image = "%04d.miff" % content_id # MIFF is native to ImageMagick
|
30
|
+
stitch_tiles(tiles, stitched_image)
|
31
|
+
crop(stitched_image, filename)
|
32
|
+
FileUtils.rm(stitched_image)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def filename
|
37
|
+
title.gsub(/[^a-z0-9]+/i, "-") + ".jpg"
|
38
|
+
end
|
39
|
+
|
40
|
+
def stitch_tiles(files, destination)
|
41
|
+
tiling = "%dx%d" % [tiles_across, tiles_down]
|
42
|
+
tile_size_param = "%dx%d" % [tile_size, tile_size]
|
43
|
+
report "Stitching" do
|
44
|
+
system(*(["montage", "-geometry", tile_size_param, "-tile", tiling] + files + [destination]))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def crop(source, destination)
|
49
|
+
final_size = "%dx%d+0+0" % [width.ceil, height.ceil]
|
50
|
+
report "Cropping" do
|
51
|
+
system("convert", "-crop", final_size, source, destination)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def fetch_tiles
|
56
|
+
progress = ProgressBar.new("Fetching", tiles_across * tiles_down)
|
57
|
+
(0 ... tiles_down).each do |y|
|
58
|
+
(0 ... tiles_across).each do |x|
|
59
|
+
fetch_tile(x, y)
|
60
|
+
progress.inc
|
61
|
+
end
|
62
|
+
end
|
63
|
+
progress.finish
|
64
|
+
end
|
65
|
+
|
66
|
+
def prepared_tile_paths
|
67
|
+
files = []
|
68
|
+
progress = ProgressBar.new("Preparing", tiles_across * tiles_down)
|
69
|
+
(0 ... tiles_down).each do |y|
|
70
|
+
(0 ... tiles_across).each do |x|
|
71
|
+
path = local_tile_path(x, y)
|
72
|
+
if x == (tiles_across - 1) || y == (tiles_down - 1)
|
73
|
+
modified_path = local_tile_path(x, y, "modified")
|
74
|
+
unless File.exist?(modified_path)
|
75
|
+
tile_size_param = "%dx%d" % [tile_size, tile_size]
|
76
|
+
system "convert", "-size", tile_size_param, "xc:white", modified_path
|
77
|
+
system "composite", "-geometry", "+0+0", path, modified_path, modified_path
|
78
|
+
end
|
79
|
+
files << modified_path
|
80
|
+
else
|
81
|
+
files << path
|
82
|
+
end
|
83
|
+
progress.inc
|
84
|
+
end
|
85
|
+
end
|
86
|
+
progress.finish
|
87
|
+
files
|
88
|
+
end
|
89
|
+
|
90
|
+
def local_tile_path(x, y, suffix=nil)
|
91
|
+
File.join(TEMP_DIR, "%04d-%02d-%02d%s.jpg" % [content_id, y, x, suffix ? "-" + suffix : ""])
|
92
|
+
end
|
93
|
+
|
94
|
+
def fetch_tile(x, y)
|
95
|
+
path = local_tile_path(x, y)
|
96
|
+
return if File.exist?(path)
|
97
|
+
FileUtils.mkdir_p(File.dirname(path))
|
98
|
+
data = get(tile_uri(x, y))
|
99
|
+
File.open(path, "wb") do |f|
|
100
|
+
f << data
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def tile_uri(x, y)
|
105
|
+
key = "%02d0%04d%02d0%d" % [y, content_id, x, zoom]
|
106
|
+
obfuscated = key.gsub(/\d/){ |m| OBFUSCATOR[m.to_i] }
|
107
|
+
TILE_URI % obfuscated
|
108
|
+
end
|
109
|
+
|
110
|
+
def data
|
111
|
+
return @data if @data
|
112
|
+
dl = page_source[%r{<dl class="data".*?</dl>}]
|
113
|
+
raise NoDataError unless dl
|
114
|
+
entries = []
|
115
|
+
dl.scan(%r{<d[td]>([^<]*)</d[td]>}) do |m|
|
116
|
+
value = m[0]
|
117
|
+
case value
|
118
|
+
when /^-?\d+$/
|
119
|
+
entries << value.to_i
|
120
|
+
when /^-?\d+\.\d+$/
|
121
|
+
entries << value.to_f
|
122
|
+
else
|
123
|
+
entries << value
|
124
|
+
end
|
125
|
+
end
|
126
|
+
@data = Hash[*entries]
|
127
|
+
end
|
128
|
+
|
129
|
+
def zoom
|
130
|
+
data["max"]
|
131
|
+
end
|
132
|
+
|
133
|
+
def content_id
|
134
|
+
data["contentId"]
|
135
|
+
end
|
136
|
+
|
137
|
+
def width
|
138
|
+
data["width"]
|
139
|
+
end
|
140
|
+
|
141
|
+
def height
|
142
|
+
data["height"]
|
143
|
+
end
|
144
|
+
|
145
|
+
def tile_size
|
146
|
+
data["tileSize"]
|
147
|
+
end
|
148
|
+
|
149
|
+
def tiles_across
|
150
|
+
(width / tile_size).ceil
|
151
|
+
end
|
152
|
+
|
153
|
+
def tiles_down
|
154
|
+
(height / tile_size).ceil
|
155
|
+
end
|
156
|
+
|
157
|
+
def title
|
158
|
+
CGI.unescapeHTML(page_source[%r{<h1[^>]*>([^<]+)</h1>}, 1])
|
159
|
+
end
|
160
|
+
|
161
|
+
def page_source
|
162
|
+
@page_source ||= get(page_uri)
|
163
|
+
end
|
164
|
+
|
165
|
+
def get(uri)
|
166
|
+
open(uri, "User-Agent" => USER_AGENT, "Referer" => page_uri){ |f| f.read }
|
167
|
+
end
|
168
|
+
|
169
|
+
def report(message)
|
170
|
+
$stderr.print message, " ... "
|
171
|
+
$stderr.flush
|
172
|
+
yield
|
173
|
+
$stderr.puts "done"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
if ARGV.empty?
|
178
|
+
puts "Usage: #{$0} gallery_uri [gallery_uri [...]]"
|
179
|
+
end
|
180
|
+
|
181
|
+
ARGV.each do |uri|
|
182
|
+
begin
|
183
|
+
NationalGallery.new(uri).grab
|
184
|
+
rescue NationalGallery::NoDataError
|
185
|
+
$stderr.puts "This painting does not appear to be available at high resolution."
|
186
|
+
end
|
187
|
+
end
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: natgal-dl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Paul Battley
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
type: :runtime
|
16
|
+
name: progressbar
|
17
|
+
prerelease: false
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
none: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
requirements:
|
26
|
+
- - ! '>='
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '0'
|
29
|
+
none: false
|
30
|
+
description: Download high-resolution images of paintings in the National Gallery
|
31
|
+
collection.
|
32
|
+
email:
|
33
|
+
- pbattley@gmail.com
|
34
|
+
executables:
|
35
|
+
- natgal-dl
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- bin/natgal-dl
|
40
|
+
homepage: https://github.com/threedaymonk/natgal-dl
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
none: false
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
none: false
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.23
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: National Gallery picture downloader
|
64
|
+
test_files: []
|