natgal-dl 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/bin/natgal-dl +187 -0
  2. metadata +64 -0
data/bin/natgal-dl ADDED
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env ruby
2
+ require "open-uri"
3
+ require "fileutils"
4
+ require "progressbar"
5
+ require "cgi"
6
+
7
+ unless system("which convert > /dev/null")
8
+ raise "ImageMagick is not installed or not in the path."
9
+ end
10
+
11
+ class NationalGallery
12
+ NoDataError = Class.new(RuntimeError)
13
+
14
+ OBFUSCATOR = "vRfOdXapKz".scan(/./)
15
+ TILE_URI = "http://www.nationalgallery.org.uk/custom/ng/tile.php?id=%s"
16
+ USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
17
+ TEMP_DIR = "/tmp/natgal-tiles"
18
+
19
+ attr_reader :page_uri
20
+
21
+ def initialize(page_uri)
22
+ @page_uri = page_uri
23
+ end
24
+
25
+ def grab
26
+ $stderr.puts title
27
+ fetch_tiles
28
+ tiles = prepared_tile_paths
29
+ stitched_image = "%04d.miff" % content_id # MIFF is native to ImageMagick
30
+ stitch_tiles(tiles, stitched_image)
31
+ crop(stitched_image, filename)
32
+ FileUtils.rm(stitched_image)
33
+ end
34
+
35
+ private
36
+ def filename
37
+ title.gsub(/[^a-z0-9]+/i, "-") + ".jpg"
38
+ end
39
+
40
+ def stitch_tiles(files, destination)
41
+ tiling = "%dx%d" % [tiles_across, tiles_down]
42
+ tile_size_param = "%dx%d" % [tile_size, tile_size]
43
+ report "Stitching" do
44
+ system(*(["montage", "-geometry", tile_size_param, "-tile", tiling] + files + [destination]))
45
+ end
46
+ end
47
+
48
+ def crop(source, destination)
49
+ final_size = "%dx%d+0+0" % [width.ceil, height.ceil]
50
+ report "Cropping" do
51
+ system("convert", "-crop", final_size, source, destination)
52
+ end
53
+ end
54
+
55
+ def fetch_tiles
56
+ progress = ProgressBar.new("Fetching", tiles_across * tiles_down)
57
+ (0 ... tiles_down).each do |y|
58
+ (0 ... tiles_across).each do |x|
59
+ fetch_tile(x, y)
60
+ progress.inc
61
+ end
62
+ end
63
+ progress.finish
64
+ end
65
+
66
+ def prepared_tile_paths
67
+ files = []
68
+ progress = ProgressBar.new("Preparing", tiles_across * tiles_down)
69
+ (0 ... tiles_down).each do |y|
70
+ (0 ... tiles_across).each do |x|
71
+ path = local_tile_path(x, y)
72
+ if x == (tiles_across - 1) || y == (tiles_down - 1)
73
+ modified_path = local_tile_path(x, y, "modified")
74
+ unless File.exist?(modified_path)
75
+ tile_size_param = "%dx%d" % [tile_size, tile_size]
76
+ system "convert", "-size", tile_size_param, "xc:white", modified_path
77
+ system "composite", "-geometry", "+0+0", path, modified_path, modified_path
78
+ end
79
+ files << modified_path
80
+ else
81
+ files << path
82
+ end
83
+ progress.inc
84
+ end
85
+ end
86
+ progress.finish
87
+ files
88
+ end
89
+
90
+ def local_tile_path(x, y, suffix=nil)
91
+ File.join(TEMP_DIR, "%04d-%02d-%02d%s.jpg" % [content_id, y, x, suffix ? "-" + suffix : ""])
92
+ end
93
+
94
+ def fetch_tile(x, y)
95
+ path = local_tile_path(x, y)
96
+ return if File.exist?(path)
97
+ FileUtils.mkdir_p(File.dirname(path))
98
+ data = get(tile_uri(x, y))
99
+ File.open(path, "wb") do |f|
100
+ f << data
101
+ end
102
+ end
103
+
104
+ def tile_uri(x, y)
105
+ key = "%02d0%04d%02d0%d" % [y, content_id, x, zoom]
106
+ obfuscated = key.gsub(/\d/){ |m| OBFUSCATOR[m.to_i] }
107
+ TILE_URI % obfuscated
108
+ end
109
+
110
+ def data
111
+ return @data if @data
112
+ dl = page_source[%r{<dl class="data".*?</dl>}]
113
+ raise NoDataError unless dl
114
+ entries = []
115
+ dl.scan(%r{<d[td]>([^<]*)</d[td]>}) do |m|
116
+ value = m[0]
117
+ case value
118
+ when /^-?\d+$/
119
+ entries << value.to_i
120
+ when /^-?\d+\.\d+$/
121
+ entries << value.to_f
122
+ else
123
+ entries << value
124
+ end
125
+ end
126
+ @data = Hash[*entries]
127
+ end
128
+
129
+ def zoom
130
+ data["max"]
131
+ end
132
+
133
+ def content_id
134
+ data["contentId"]
135
+ end
136
+
137
+ def width
138
+ data["width"]
139
+ end
140
+
141
+ def height
142
+ data["height"]
143
+ end
144
+
145
+ def tile_size
146
+ data["tileSize"]
147
+ end
148
+
149
+ def tiles_across
150
+ (width / tile_size).ceil
151
+ end
152
+
153
+ def tiles_down
154
+ (height / tile_size).ceil
155
+ end
156
+
157
+ def title
158
+ CGI.unescapeHTML(page_source[%r{<h1[^>]*>([^<]+)</h1>}, 1])
159
+ end
160
+
161
+ def page_source
162
+ @page_source ||= get(page_uri)
163
+ end
164
+
165
+ def get(uri)
166
+ open(uri, "User-Agent" => USER_AGENT, "Referer" => page_uri){ |f| f.read }
167
+ end
168
+
169
+ def report(message)
170
+ $stderr.print message, " ... "
171
+ $stderr.flush
172
+ yield
173
+ $stderr.puts "done"
174
+ end
175
+ end
176
+
177
+ if ARGV.empty?
178
+ puts "Usage: #{$0} gallery_uri [gallery_uri [...]]"
179
+ end
180
+
181
+ ARGV.each do |uri|
182
+ begin
183
+ NationalGallery.new(uri).grab
184
+ rescue NationalGallery::NoDataError
185
+ $stderr.puts "This painting does not appear to be available at high resolution."
186
+ end
187
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: natgal-dl
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Paul Battley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ type: :runtime
16
+ name: progressbar
17
+ prerelease: false
18
+ requirement: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ none: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ! '>='
27
+ - !ruby/object:Gem::Version
28
+ version: '0'
29
+ none: false
30
+ description: Download high-resolution images of paintings in the National Gallery
31
+ collection.
32
+ email:
33
+ - pbattley@gmail.com
34
+ executables:
35
+ - natgal-dl
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - bin/natgal-dl
40
+ homepage: https://github.com/threedaymonk/natgal-dl
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ none: false
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ none: false
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.23
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: National Gallery picture downloader
64
+ test_files: []