murlsh 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +5 -3
- data/VERSION +1 -1
- data/config.yaml +1 -1
- data/lib/murlsh/atom_feed.rb +13 -14
- data/lib/murlsh/doc.rb +34 -0
- data/lib/murlsh/failproof.rb +15 -0
- data/lib/murlsh/uri.rb +12 -0
- data/lib/murlsh/uri_ask.rb +84 -0
- data/lib/murlsh/url.rb +4 -4
- data/lib/murlsh/url_body.rb +4 -10
- data/lib/murlsh.rb +4 -3
- data/murlsh.gemspec +12 -15
- data/plugins/lookup_content_type_title.rb +3 -2
- data/public/css/jquery.jgrowl.css +8 -7
- data/public/css/screen.css +10 -2
- data/public/js/jquery-1.4.min.js +151 -0
- data/public/js/jquery.jgrowl_compressed.js +38 -18
- data/public/js/js.js +222 -203
- data/test/atom_feed_test.rb +101 -0
- data/test/uri_ask_test.rb +100 -0
- metadata +11 -14
- data/lib/murlsh/get_content_type.rb +0 -92
- data/lib/murlsh/get_title.rb +0 -72
- data/lib/murlsh/referrer.rb +0 -50
- data/public/js/jquery-1.3.2.min.js +0 -19
- data/test/get_charset_test.rb +0 -25
- data/test/get_content_type_test.rb +0 -63
- data/test/get_title_test.rb +0 -43
- data/test/referrer_test.rb +0 -71
data/Rakefile
CHANGED
@@ -21,7 +21,8 @@ config = YAML.load_file('config.yaml')
|
|
21
21
|
|
22
22
|
desc "Test remote content type fetch for a URL and show errors."
|
23
23
|
task :content_type, :url do |t, args|
|
24
|
-
puts
|
24
|
+
puts URI(args.url).extend(Murlsh::UriAsk).content_type(:failproof => false,
|
25
|
+
:debug => STDOUT)
|
25
26
|
end
|
26
27
|
|
27
28
|
namespace :db do
|
@@ -101,7 +102,8 @@ end
|
|
101
102
|
|
102
103
|
desc "Test remote title fetch for a URL and show errors."
|
103
104
|
task :title, :url do |t, args|
|
104
|
-
puts
|
105
|
+
puts URI(args.url).extend(Murlsh::UriAsk).title(:failproof => false,
|
106
|
+
:debug => STDOUT)
|
105
107
|
end
|
106
108
|
|
107
109
|
desc 'Try to fetch the title for a url and update it in the database.'
|
@@ -111,7 +113,7 @@ task :title_fetch, :url_id do |t, args|
|
|
111
113
|
url = Murlsh::Url.find(args.url_id)
|
112
114
|
puts "Url: #{url.url}"
|
113
115
|
puts "Previous title: #{url.title}"
|
114
|
-
url.title =
|
116
|
+
url.title = URI(url.url).extend(Murlsh::UriAsk).title(:failproof => false)
|
115
117
|
url.save
|
116
118
|
puts "\nNew title: #{url.title}"
|
117
119
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/config.yaml
CHANGED
data/lib/murlsh/atom_feed.rb
CHANGED
@@ -21,18 +21,9 @@ module Murlsh
|
|
21
21
|
@filename = options[:filename]
|
22
22
|
@title = options[:title]
|
23
23
|
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
# Set up fields to use for building item ids.
|
28
|
-
def setup_id_fields
|
29
|
-
uri_parsed = URI(@root_url)
|
30
|
-
|
31
|
-
m = uri_parsed.host.match(/^(.*?)\.?([^.]+\.[^.]+)$/)
|
24
|
+
root_uri = URI(@root_url)
|
32
25
|
|
33
|
-
@host, @domain =
|
34
|
-
|
35
|
-
@path = uri_parsed.path
|
26
|
+
@host, @domain, @path = root_uri.host, root_uri.domain, root_uri.path
|
36
27
|
end
|
37
28
|
|
38
29
|
# Generate the feed and write it to the filesystem with locking.
|
@@ -54,16 +45,16 @@ module Murlsh
|
|
54
45
|
entries.each do |mu|
|
55
46
|
xm.entry {
|
56
47
|
xm.author { xm.name(mu.name) }
|
57
|
-
xm.title(mu.
|
48
|
+
xm.title(mu.title_stripped)
|
58
49
|
xm.id(entry_id(mu))
|
59
|
-
xm.summary(mu.
|
50
|
+
xm.summary(mu.title_stripped)
|
60
51
|
xm.updated(mu.time.xmlschema)
|
61
52
|
xm.link(:href => mu.url)
|
62
53
|
enclosure(xm, mu)
|
54
|
+
via(xm, mu)
|
63
55
|
}
|
64
56
|
end
|
65
57
|
}
|
66
|
-
xm
|
67
58
|
end
|
68
59
|
|
69
60
|
# Build the entry's id.
|
@@ -77,6 +68,14 @@ module Murlsh
|
|
77
68
|
:title => 'Full-size') if mu.is_image?
|
78
69
|
end
|
79
70
|
|
71
|
+
def via(xm, mu)
|
72
|
+
begin
|
73
|
+
xm.link(:rel => 'via', :type => 'text/html', :href => mu.via,
|
74
|
+
:title => URI(mu.via).domain) if mu.via
|
75
|
+
rescue Exception
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
80
79
|
end
|
81
80
|
|
82
81
|
end
|
data/lib/murlsh/doc.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Murlsh
|
5
|
+
|
6
|
+
# Hpricot:Doc mixin.
|
7
|
+
module Doc
|
8
|
+
|
9
|
+
# Get the character set of the document.
|
10
|
+
def charset
|
11
|
+
%w{content-type Content-Type}.each do |ct|
|
12
|
+
content_type = at("meta[@http-equiv='#{ct}']")
|
13
|
+
unless content_type.nil?
|
14
|
+
content = content_type['content']
|
15
|
+
unless content.nil?
|
16
|
+
charset = content[/charset=([\w_.:-]+)/, 1]
|
17
|
+
return charset if charset
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
# Find the title of the document.
|
25
|
+
def title
|
26
|
+
%w{//html/head/title //head/title //html/title //title}.each do |xpath|
|
27
|
+
return (self/xpath).first.inner_html unless (self/xpath).first.nil?
|
28
|
+
end
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/lib/murlsh/uri.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
# Extra methods added to URI class.
|
4
|
+
class URI::Generic
|
5
|
+
|
6
|
+
# Return the domain.
|
7
|
+
def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
|
8
|
+
|
9
|
+
# Return the path and query string.
|
10
|
+
def path_query; path + (query ? "?#{query}" : ''); end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'hpricot'
|
8
|
+
require 'htmlentities'
|
9
|
+
require 'iconv'
|
10
|
+
|
11
|
+
module Murlsh
|
12
|
+
|
13
|
+
# URI mixin.
|
14
|
+
module UriAsk
|
15
|
+
|
16
|
+
# Get the content type.
|
17
|
+
#
|
18
|
+
# Options:
|
19
|
+
# * :failproof - if true hide all exceptions and return empty string on failure
|
20
|
+
# * :headers - hash of headers to send in request
|
21
|
+
def content_type(options={})
|
22
|
+
return @content_type if defined?(@content_type)
|
23
|
+
options[:headers] = default_headers.merge(options.fetch(:headers, {}))
|
24
|
+
|
25
|
+
@content_type = ''
|
26
|
+
Murlsh::failproof(options) do
|
27
|
+
# try head first to save bandwidth
|
28
|
+
http = Net::HTTP.new(host, port)
|
29
|
+
http.use_ssl = (scheme == 'https')
|
30
|
+
|
31
|
+
resp = http.request_head(path_query, options[:headers])
|
32
|
+
@content_type = case resp
|
33
|
+
when Net::HTTPSuccess then resp['content-type']
|
34
|
+
else self.open(options[:headers]) { |f| f.content_type }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@content_type
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get the HTML title.
|
41
|
+
#
|
42
|
+
# Options:
|
43
|
+
# * :failproof - if true hide all exceptions and return empty string on failure
|
44
|
+
# * :headers - hash of headers to send in request
|
45
|
+
def title(options={})
|
46
|
+
return @title if defined?(@title)
|
47
|
+
options[:headers] = default_headers.merge(options.fetch(:headers, {}))
|
48
|
+
|
49
|
+
@title = to_s
|
50
|
+
if might_have_title?(options)
|
51
|
+
Murlsh::failproof(options) do
|
52
|
+
self.open(options[:headers]) do |f|
|
53
|
+
doc = Hpricot(f).extend(Murlsh::Doc)
|
54
|
+
|
55
|
+
@title = HTMLEntities.new.decode(Iconv.conv('utf-8',
|
56
|
+
doc.charset || f.charset, doc.title))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
@title
|
61
|
+
end
|
62
|
+
|
63
|
+
# Default headers sent with the request.
|
64
|
+
def default_headers
|
65
|
+
result = {
|
66
|
+
'User-Agent' =>
|
67
|
+
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
68
|
+
}
|
69
|
+
if (host || '')[/^www\.nytimes\.com/]
|
70
|
+
result['Referer'] = 'http://news.google.com/'
|
71
|
+
end
|
72
|
+
|
73
|
+
result
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return true if the content type is likely to have a title that can be
|
77
|
+
# parsed.
|
78
|
+
def might_have_title?(options={})
|
79
|
+
content_type(options)[/^text\/html/]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
data/lib/murlsh/url.rb
CHANGED
@@ -3,10 +3,6 @@ require 'active_record'
|
|
3
3
|
|
4
4
|
require 'uri'
|
5
5
|
|
6
|
-
module URI
|
7
|
-
def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
|
8
|
-
end
|
9
|
-
|
10
6
|
module Murlsh
|
11
7
|
|
12
8
|
# URL ActiveRecord.
|
@@ -17,6 +13,10 @@ module Murlsh
|
|
17
13
|
read_attribute(:title) || read_attribute(:url) || 'title missing'
|
18
14
|
end
|
19
15
|
|
16
|
+
# Title with whitespace compressed and leading and trailing whitespace
|
17
|
+
# stripped.
|
18
|
+
def title_stripped; title.strip.gsub(/\s+/, ' '); end
|
19
|
+
|
20
20
|
# Return true if this url has the same author as another url.
|
21
21
|
def same_author?(other)
|
22
22
|
other and other.email and other.name and
|
data/lib/murlsh/url_body.rb
CHANGED
@@ -54,15 +54,14 @@ module Murlsh
|
|
54
54
|
div(mu.name, :class => 'name') if mu.name
|
55
55
|
end
|
56
56
|
|
57
|
-
a(mu.
|
57
|
+
a(mu.title_stripped, :href => mu.url)
|
58
58
|
|
59
59
|
mu.hostrec do |hostrec|
|
60
|
-
|
61
|
-
span(hostrec, :class => 'host')
|
60
|
+
self.sub(" [#{hostrec}]", :class => 'host')
|
62
61
|
end
|
63
62
|
mu.viarec do |via|
|
64
63
|
span(:class => 'via') {
|
65
|
-
text!('
|
64
|
+
text!(' via '); a(via.domain, :href => via)
|
66
65
|
}
|
67
66
|
end
|
68
67
|
span(", #{mu.time.fuzzy}", :class => 'date') if
|
@@ -115,14 +114,9 @@ module Murlsh
|
|
115
114
|
# Search form builder.
|
116
115
|
def search_form
|
117
116
|
form(:action => '', :method => 'get') {
|
118
|
-
value = @q
|
119
|
-
Murlsh::Referrer.new(@req.referrer).search_query do |refq|
|
120
|
-
re_parts = refq.split.collect { |x| Regexp.escape(x) }
|
121
|
-
value = "\\b(#{re_parts.join('|')})\\b"
|
122
|
-
end
|
123
117
|
fieldset {
|
124
118
|
input(:type => 'text', :id => 'q', :name => 'q', :size => 32,
|
125
|
-
:value =>
|
119
|
+
:value => @q)
|
126
120
|
input(:type => 'submit', :value=> 'Regex Search')
|
127
121
|
}
|
128
122
|
}
|
data/lib/murlsh.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
+
require 'murlsh/doc'
|
2
|
+
require 'murlsh/uri'
|
1
3
|
require 'murlsh/auth'
|
2
4
|
require 'murlsh/dispatch'
|
3
|
-
require 'murlsh/
|
4
|
-
require 'murlsh/get_title'
|
5
|
+
require 'murlsh/failproof'
|
5
6
|
require 'murlsh/openlock'
|
6
7
|
require 'murlsh/plugin'
|
7
|
-
require 'murlsh/referrer'
|
8
8
|
require 'murlsh/sqlite3_adapter'
|
9
9
|
require 'murlsh/time'
|
10
10
|
require 'murlsh/url_server'
|
11
11
|
require 'murlsh/url'
|
12
|
+
require 'murlsh/uri_ask'
|
12
13
|
require 'murlsh/xhtml_response'
|
13
14
|
|
14
15
|
# requiring builder before active_record blows up
|
data/murlsh.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{murlsh}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matthew M. Boedicker"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.default_executable = %q{murlsh}
|
14
14
|
s.description = %q{url sharing site framework with easy adding, title lookup, atom feed, thumbnails and embedding}
|
15
15
|
s.email = %q{matthewm@boedicker.org}
|
@@ -31,14 +31,15 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/murlsh/atom_feed.rb",
|
32
32
|
"lib/murlsh/auth.rb",
|
33
33
|
"lib/murlsh/dispatch.rb",
|
34
|
-
"lib/murlsh/
|
35
|
-
"lib/murlsh/
|
34
|
+
"lib/murlsh/doc.rb",
|
35
|
+
"lib/murlsh/failproof.rb",
|
36
36
|
"lib/murlsh/markup.rb",
|
37
37
|
"lib/murlsh/openlock.rb",
|
38
38
|
"lib/murlsh/plugin.rb",
|
39
|
-
"lib/murlsh/referrer.rb",
|
40
39
|
"lib/murlsh/sqlite3_adapter.rb",
|
41
40
|
"lib/murlsh/time.rb",
|
41
|
+
"lib/murlsh/uri.rb",
|
42
|
+
"lib/murlsh/uri_ask.rb",
|
42
43
|
"lib/murlsh/url.rb",
|
43
44
|
"lib/murlsh/url_body.rb",
|
44
45
|
"lib/murlsh/url_server.rb",
|
@@ -50,17 +51,15 @@ Gem::Specification.new do |s|
|
|
50
51
|
"plugins/update_feed.rb",
|
51
52
|
"public/css/jquery.jgrowl.css",
|
52
53
|
"public/css/screen.css",
|
53
|
-
"public/js/jquery-1.
|
54
|
+
"public/js/jquery-1.4.min.js",
|
54
55
|
"public/js/jquery.cookie.js",
|
55
56
|
"public/js/jquery.jgrowl_compressed.js",
|
56
57
|
"public/js/js.js",
|
57
58
|
"public/swf/player_mp3_mini.swf",
|
59
|
+
"test/atom_feed_test.rb",
|
58
60
|
"test/auth_test.rb",
|
59
|
-
"test/get_charset_test.rb",
|
60
|
-
"test/get_content_type_test.rb",
|
61
|
-
"test/get_title_test.rb",
|
62
61
|
"test/markup_test.rb",
|
63
|
-
"test/
|
62
|
+
"test/uri_ask_test.rb",
|
64
63
|
"test/xhtml_response_test.rb"
|
65
64
|
]
|
66
65
|
s.homepage = %q{http://github.com/mmb/murlsh}
|
@@ -70,12 +69,10 @@ Gem::Specification.new do |s|
|
|
70
69
|
s.summary = %q{url sharing site framework}
|
71
70
|
s.test_files = [
|
72
71
|
"test/xhtml_response_test.rb",
|
72
|
+
"test/uri_ask_test.rb",
|
73
73
|
"test/markup_test.rb",
|
74
|
-
"test/
|
75
|
-
"test/
|
76
|
-
"test/get_content_type_test.rb",
|
77
|
-
"test/auth_test.rb",
|
78
|
-
"test/get_title_test.rb"
|
74
|
+
"test/atom_feed_test.rb",
|
75
|
+
"test/auth_test.rb"
|
79
76
|
]
|
80
77
|
|
81
78
|
if s.respond_to? :specification_version then
|
@@ -8,8 +8,9 @@ module Murlsh
|
|
8
8
|
Hook = 'add_pre'
|
9
9
|
|
10
10
|
def self.run(url, config)
|
11
|
-
|
12
|
-
url.
|
11
|
+
ask = URI(url.url).extend(Murlsh::UriAsk)
|
12
|
+
url.content_type = ask.content_type
|
13
|
+
url.title = ask.title
|
13
14
|
end
|
14
15
|
|
15
16
|
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
div.jGrowl {
|
3
3
|
padding: 10px;
|
4
4
|
z-index: 9999;
|
5
|
+
color: #fff;
|
6
|
+
font-size: 12px;
|
5
7
|
}
|
6
8
|
|
7
9
|
/** Special IE6 Style Positioning **/
|
@@ -76,16 +78,16 @@ div.center div.jGrowl-notification, div.center div.jGrowl-closer {
|
|
76
78
|
|
77
79
|
div.jGrowl div.jGrowl-notification, div.jGrowl div.jGrowl-closer {
|
78
80
|
background-color: #000;
|
79
|
-
color: #fff;
|
80
81
|
opacity: .85;
|
81
|
-
|
82
|
+
-ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=85)";
|
83
|
+
filter: progid:DXImageTransform.Microsoft.Alpha(Opacity=85);
|
82
84
|
zoom: 1;
|
83
85
|
width: 235px;
|
84
86
|
padding: 10px;
|
85
87
|
margin-top: 5px;
|
86
88
|
margin-bottom: 5px;
|
87
89
|
font-family: Tahoma, Arial, Helvetica, sans-serif;
|
88
|
-
font-size:
|
90
|
+
font-size: 1em;
|
89
91
|
text-align: left;
|
90
92
|
display: none;
|
91
93
|
-moz-border-radius: 5px;
|
@@ -98,23 +100,22 @@ div.jGrowl div.jGrowl-notification {
|
|
98
100
|
|
99
101
|
div.jGrowl div.jGrowl-notification div.header {
|
100
102
|
font-weight: bold;
|
101
|
-
font-size:
|
103
|
+
font-size: .85em;
|
102
104
|
}
|
103
105
|
|
104
106
|
div.jGrowl div.jGrowl-notification div.close {
|
105
107
|
z-index: 99;
|
106
108
|
float: right;
|
107
109
|
font-weight: bold;
|
108
|
-
font-size:
|
110
|
+
font-size: 1em;
|
109
111
|
cursor: pointer;
|
110
112
|
}
|
111
113
|
|
112
114
|
div.jGrowl div.jGrowl-closer {
|
113
|
-
height: 15px;
|
114
115
|
padding-top: 4px;
|
115
116
|
padding-bottom: 4px;
|
116
117
|
cursor: pointer;
|
117
|
-
font-size:
|
118
|
+
font-size: .9em;
|
118
119
|
font-weight: bold;
|
119
120
|
text-align: center;
|
120
121
|
}
|
data/public/css/screen.css
CHANGED
@@ -51,11 +51,19 @@ img.thumb, li object {
|
|
51
51
|
margin-right : 10px;
|
52
52
|
}
|
53
53
|
|
54
|
-
|
54
|
+
sub.host {
|
55
55
|
color : #808080;
|
56
56
|
font-family : monospace;
|
57
57
|
}
|
58
58
|
|
59
|
+
span.via {
|
60
|
+
font-size : 0.75em;
|
61
|
+
}
|
62
|
+
|
63
|
+
span.via:before {
|
64
|
+
content : " \2190 \ ";
|
65
|
+
}
|
66
|
+
|
59
67
|
fieldset {
|
60
68
|
border : 0;
|
61
69
|
margin : 0;
|
@@ -121,7 +129,7 @@ div.jGrowl div.jGrowl-closer {
|
|
121
129
|
width : 130px;
|
122
130
|
}
|
123
131
|
|
124
|
-
input#url {
|
132
|
+
input#url, input#via {
|
125
133
|
width : 160px;
|
126
134
|
}
|
127
135
|
|