boston-stitch 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/boston-stitch +94 -0
- metadata +69 -0
data/bin/boston-stitch
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'yaml'
|
4
|
+
require 'sinatra'
|
5
|
+
require 'timeout'
|
6
|
+
|
7
|
+
$title = nil
|
8
|
+
|
9
|
+
def process(url)
|
10
|
+
url = url
|
11
|
+
html = `curl -s '#{url}'`
|
12
|
+
doc = Nokogiri::HTML(html)
|
13
|
+
unless $title
|
14
|
+
$title = doc.at('head title').inner_text
|
15
|
+
$stderr.puts "Title: #$title"
|
16
|
+
end
|
17
|
+
caption = doc.at('#photoCaption .tt').inner_html
|
18
|
+
caption_text = doc.at('#photoCaption .tt').inner_text
|
19
|
+
nextlink = doc.at('#photoCaption .next a')
|
20
|
+
img = doc.at('.Photo img')
|
21
|
+
photo_url = if !img.nil?
|
22
|
+
img[:src]
|
23
|
+
end
|
24
|
+
iframe_html = unless photo_url
|
25
|
+
doc.at('.Photo iframe').to_html
|
26
|
+
end
|
27
|
+
nexturl = nextlink.inner_text =~ /next/i ? nextlink[:href] : nil
|
28
|
+
{ photo: photo_url,
|
29
|
+
url: url,
|
30
|
+
caption_html: caption,
|
31
|
+
caption_text: caption_text,
|
32
|
+
iframe_html: iframe_html,
|
33
|
+
next: nexturl }
|
34
|
+
end
|
35
|
+
|
36
|
+
start_url = ARGV.first
|
37
|
+
|
38
|
+
if start_url.nil? || start_url !~ /^http/
|
39
|
+
$stderr.puts "The first argument must be a URL to a Boston.com gallery page."
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
|
43
|
+
pages = []
|
44
|
+
page = process start_url
|
45
|
+
pages << page
|
46
|
+
while page[:next]
|
47
|
+
|
48
|
+
STDERR.puts "Fetching #{page[:next]}"
|
49
|
+
begin
|
50
|
+
Timeout::timeout(5) {
|
51
|
+
page = process page[:next]
|
52
|
+
pages << page
|
53
|
+
}
|
54
|
+
rescue Timeout::Error
|
55
|
+
puts "Timed out. Skipping"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
$pages = { title: $title, pages: pages }
|
60
|
+
|
61
|
+
class Browser < Sinatra::Base
|
62
|
+
template :index do
|
63
|
+
<<END
|
64
|
+
%html
|
65
|
+
%head
|
66
|
+
%title= @pages[:title]
|
67
|
+
%style
|
68
|
+
td { vertical-align: middle; padding: 10px; border-top: 1px dotted #CCC }
|
69
|
+
td:first-child { font-size: 16px; font-family: Arial; width: 25% }
|
70
|
+
%body
|
71
|
+
%h2= @pages[:title]
|
72
|
+
%table
|
73
|
+
- @pages[:pages].each do |page|
|
74
|
+
%tr
|
75
|
+
%td
|
76
|
+
= page[:caption_html]
|
77
|
+
%br
|
78
|
+
%p{style: 'font-size: 10px'}
|
79
|
+
%a{href: page[:url]} source page
|
80
|
+
%td
|
81
|
+
- if page[:photo]
|
82
|
+
%img{src: page[:photo]}
|
83
|
+
- elsif page[:iframe_html]
|
84
|
+
= page[:iframe_html]
|
85
|
+
END
|
86
|
+
end
|
87
|
+
|
88
|
+
get '/' do
|
89
|
+
@pages = $pages
|
90
|
+
haml :index
|
91
|
+
end
|
92
|
+
run!
|
93
|
+
end
|
94
|
+
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: boston-stitch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Daniel Choi
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-28 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &16998780 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *16998780
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sinatra
|
27
|
+
requirement: &16997740 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *16997740
|
36
|
+
description: Stitch together all the pages of a Boston.com gallery
|
37
|
+
email:
|
38
|
+
- dhchoi@gmail.com
|
39
|
+
executables:
|
40
|
+
- boston-stitch
|
41
|
+
extensions: []
|
42
|
+
extra_rdoc_files: []
|
43
|
+
files:
|
44
|
+
- bin/boston-stitch
|
45
|
+
homepage: http://github.com/danchoi/boston-stitch
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 1.9.0
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project: boston-stitch
|
65
|
+
rubygems_version: 1.8.10
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Stitch together all the pages of a Boston.com gallery
|
69
|
+
test_files: []
|