murlsh 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +5 -3
- data/VERSION +1 -1
- data/config.yaml +1 -1
- data/lib/murlsh/atom_feed.rb +13 -14
- data/lib/murlsh/doc.rb +34 -0
- data/lib/murlsh/failproof.rb +15 -0
- data/lib/murlsh/uri.rb +12 -0
- data/lib/murlsh/uri_ask.rb +84 -0
- data/lib/murlsh/url.rb +4 -4
- data/lib/murlsh/url_body.rb +4 -10
- data/lib/murlsh.rb +4 -3
- data/murlsh.gemspec +12 -15
- data/plugins/lookup_content_type_title.rb +3 -2
- data/public/css/jquery.jgrowl.css +8 -7
- data/public/css/screen.css +10 -2
- data/public/js/jquery-1.4.min.js +151 -0
- data/public/js/jquery.jgrowl_compressed.js +38 -18
- data/public/js/js.js +222 -203
- data/test/atom_feed_test.rb +101 -0
- data/test/uri_ask_test.rb +100 -0
- metadata +11 -14
- data/lib/murlsh/get_content_type.rb +0 -92
- data/lib/murlsh/get_title.rb +0 -72
- data/lib/murlsh/referrer.rb +0 -50
- data/public/js/jquery-1.3.2.min.js +0 -19
- data/test/get_charset_test.rb +0 -25
- data/test/get_content_type_test.rb +0 -63
- data/test/get_title_test.rb +0 -43
- data/test/referrer_test.rb +0 -71
data/Rakefile
CHANGED
@@ -21,7 +21,8 @@ config = YAML.load_file('config.yaml')
|
|
21
21
|
|
22
22
|
desc "Test remote content type fetch for a URL and show errors."
|
23
23
|
task :content_type, :url do |t, args|
|
24
|
-
puts
|
24
|
+
puts URI(args.url).extend(Murlsh::UriAsk).content_type(:failproof => false,
|
25
|
+
:debug => STDOUT)
|
25
26
|
end
|
26
27
|
|
27
28
|
namespace :db do
|
@@ -101,7 +102,8 @@ end
|
|
101
102
|
|
102
103
|
desc "Test remote title fetch for a URL and show errors."
|
103
104
|
task :title, :url do |t, args|
|
104
|
-
puts
|
105
|
+
puts URI(args.url).extend(Murlsh::UriAsk).title(:failproof => false,
|
106
|
+
:debug => STDOUT)
|
105
107
|
end
|
106
108
|
|
107
109
|
desc 'Try to fetch the title for a url and update it in the database.'
|
@@ -111,7 +113,7 @@ task :title_fetch, :url_id do |t, args|
|
|
111
113
|
url = Murlsh::Url.find(args.url_id)
|
112
114
|
puts "Url: #{url.url}"
|
113
115
|
puts "Previous title: #{url.title}"
|
114
|
-
url.title =
|
116
|
+
url.title = URI(url.url).extend(Murlsh::UriAsk).title(:failproof => false)
|
115
117
|
url.save
|
116
118
|
puts "\nNew title: #{url.title}"
|
117
119
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/config.yaml
CHANGED
data/lib/murlsh/atom_feed.rb
CHANGED
@@ -21,18 +21,9 @@ module Murlsh
|
|
21
21
|
@filename = options[:filename]
|
22
22
|
@title = options[:title]
|
23
23
|
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
# Set up fields to use for building item ids.
|
28
|
-
def setup_id_fields
|
29
|
-
uri_parsed = URI(@root_url)
|
30
|
-
|
31
|
-
m = uri_parsed.host.match(/^(.*?)\.?([^.]+\.[^.]+)$/)
|
24
|
+
root_uri = URI(@root_url)
|
32
25
|
|
33
|
-
@host, @domain =
|
34
|
-
|
35
|
-
@path = uri_parsed.path
|
26
|
+
@host, @domain, @path = root_uri.host, root_uri.domain, root_uri.path
|
36
27
|
end
|
37
28
|
|
38
29
|
# Generate the feed and write it to the filesystem with locking.
|
@@ -54,16 +45,16 @@ module Murlsh
|
|
54
45
|
entries.each do |mu|
|
55
46
|
xm.entry {
|
56
47
|
xm.author { xm.name(mu.name) }
|
57
|
-
xm.title(mu.
|
48
|
+
xm.title(mu.title_stripped)
|
58
49
|
xm.id(entry_id(mu))
|
59
|
-
xm.summary(mu.
|
50
|
+
xm.summary(mu.title_stripped)
|
60
51
|
xm.updated(mu.time.xmlschema)
|
61
52
|
xm.link(:href => mu.url)
|
62
53
|
enclosure(xm, mu)
|
54
|
+
via(xm, mu)
|
63
55
|
}
|
64
56
|
end
|
65
57
|
}
|
66
|
-
xm
|
67
58
|
end
|
68
59
|
|
69
60
|
# Build the entry's id.
|
@@ -77,6 +68,14 @@ module Murlsh
|
|
77
68
|
:title => 'Full-size') if mu.is_image?
|
78
69
|
end
|
79
70
|
|
71
|
+
def via(xm, mu)
|
72
|
+
begin
|
73
|
+
xm.link(:rel => 'via', :type => 'text/html', :href => mu.via,
|
74
|
+
:title => URI(mu.via).domain) if mu.via
|
75
|
+
rescue Exception
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
80
79
|
end
|
81
80
|
|
82
81
|
end
|
data/lib/murlsh/doc.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Murlsh
|
5
|
+
|
6
|
+
# Hpricot:Doc mixin.
|
7
|
+
module Doc
|
8
|
+
|
9
|
+
# Get the character set of the document.
|
10
|
+
def charset
|
11
|
+
%w{content-type Content-Type}.each do |ct|
|
12
|
+
content_type = at("meta[@http-equiv='#{ct}']")
|
13
|
+
unless content_type.nil?
|
14
|
+
content = content_type['content']
|
15
|
+
unless content.nil?
|
16
|
+
charset = content[/charset=([\w_.:-]+)/, 1]
|
17
|
+
return charset if charset
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
# Find the title of the document.
|
25
|
+
def title
|
26
|
+
%w{//html/head/title //head/title //html/title //title}.each do |xpath|
|
27
|
+
return (self/xpath).first.inner_html unless (self/xpath).first.nil?
|
28
|
+
end
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/lib/murlsh/uri.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
# Extra methods added to URI class.
|
4
|
+
class URI::Generic
|
5
|
+
|
6
|
+
# Return the domain.
|
7
|
+
def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
|
8
|
+
|
9
|
+
# Return the path and query string.
|
10
|
+
def path_query; path + (query ? "?#{query}" : ''); end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'hpricot'
|
8
|
+
require 'htmlentities'
|
9
|
+
require 'iconv'
|
10
|
+
|
11
|
+
module Murlsh
|
12
|
+
|
13
|
+
# URI mixin.
|
14
|
+
module UriAsk
|
15
|
+
|
16
|
+
# Get the content type.
|
17
|
+
#
|
18
|
+
# Options:
|
19
|
+
# * :failproof - if true hide all exceptions and return empty string on failure
|
20
|
+
# * :headers - hash of headers to send in request
|
21
|
+
def content_type(options={})
|
22
|
+
return @content_type if defined?(@content_type)
|
23
|
+
options[:headers] = default_headers.merge(options.fetch(:headers, {}))
|
24
|
+
|
25
|
+
@content_type = ''
|
26
|
+
Murlsh::failproof(options) do
|
27
|
+
# try head first to save bandwidth
|
28
|
+
http = Net::HTTP.new(host, port)
|
29
|
+
http.use_ssl = (scheme == 'https')
|
30
|
+
|
31
|
+
resp = http.request_head(path_query, options[:headers])
|
32
|
+
@content_type = case resp
|
33
|
+
when Net::HTTPSuccess then resp['content-type']
|
34
|
+
else self.open(options[:headers]) { |f| f.content_type }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@content_type
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get the HTML title.
|
41
|
+
#
|
42
|
+
# Options:
|
43
|
+
# * :failproof - if true hide all exceptions and return empty string on failure
|
44
|
+
# * :headers - hash of headers to send in request
|
45
|
+
def title(options={})
|
46
|
+
return @title if defined?(@title)
|
47
|
+
options[:headers] = default_headers.merge(options.fetch(:headers, {}))
|
48
|
+
|
49
|
+
@title = to_s
|
50
|
+
if might_have_title?(options)
|
51
|
+
Murlsh::failproof(options) do
|
52
|
+
self.open(options[:headers]) do |f|
|
53
|
+
doc = Hpricot(f).extend(Murlsh::Doc)
|
54
|
+
|
55
|
+
@title = HTMLEntities.new.decode(Iconv.conv('utf-8',
|
56
|
+
doc.charset || f.charset, doc.title))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
@title
|
61
|
+
end
|
62
|
+
|
63
|
+
# Default headers sent with the request.
|
64
|
+
def default_headers
|
65
|
+
result = {
|
66
|
+
'User-Agent' =>
|
67
|
+
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
68
|
+
}
|
69
|
+
if (host || '')[/^www\.nytimes\.com/]
|
70
|
+
result['Referer'] = 'http://news.google.com/'
|
71
|
+
end
|
72
|
+
|
73
|
+
result
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return true if the content type is likely to have a title that can be
|
77
|
+
# parsed.
|
78
|
+
def might_have_title?(options={})
|
79
|
+
content_type(options)[/^text\/html/]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
data/lib/murlsh/url.rb
CHANGED
@@ -3,10 +3,6 @@ require 'active_record'
|
|
3
3
|
|
4
4
|
require 'uri'
|
5
5
|
|
6
|
-
module URI
|
7
|
-
def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
|
8
|
-
end
|
9
|
-
|
10
6
|
module Murlsh
|
11
7
|
|
12
8
|
# URL ActiveRecord.
|
@@ -17,6 +13,10 @@ module Murlsh
|
|
17
13
|
read_attribute(:title) || read_attribute(:url) || 'title missing'
|
18
14
|
end
|
19
15
|
|
16
|
+
# Title with whitespace compressed and leading and trailing whitespace
|
17
|
+
# stripped.
|
18
|
+
def title_stripped; title.strip.gsub(/\s+/, ' '); end
|
19
|
+
|
20
20
|
# Return true if this url has the same author as another url.
|
21
21
|
def same_author?(other)
|
22
22
|
other and other.email and other.name and
|
data/lib/murlsh/url_body.rb
CHANGED
@@ -54,15 +54,14 @@ module Murlsh
|
|
54
54
|
div(mu.name, :class => 'name') if mu.name
|
55
55
|
end
|
56
56
|
|
57
|
-
a(mu.
|
57
|
+
a(mu.title_stripped, :href => mu.url)
|
58
58
|
|
59
59
|
mu.hostrec do |hostrec|
|
60
|
-
|
61
|
-
span(hostrec, :class => 'host')
|
60
|
+
self.sub(" [#{hostrec}]", :class => 'host')
|
62
61
|
end
|
63
62
|
mu.viarec do |via|
|
64
63
|
span(:class => 'via') {
|
65
|
-
text!('
|
64
|
+
text!(' via '); a(via.domain, :href => via)
|
66
65
|
}
|
67
66
|
end
|
68
67
|
span(", #{mu.time.fuzzy}", :class => 'date') if
|
@@ -115,14 +114,9 @@ module Murlsh
|
|
115
114
|
# Search form builder.
|
116
115
|
def search_form
|
117
116
|
form(:action => '', :method => 'get') {
|
118
|
-
value = @q
|
119
|
-
Murlsh::Referrer.new(@req.referrer).search_query do |refq|
|
120
|
-
re_parts = refq.split.collect { |x| Regexp.escape(x) }
|
121
|
-
value = "\\b(#{re_parts.join('|')})\\b"
|
122
|
-
end
|
123
117
|
fieldset {
|
124
118
|
input(:type => 'text', :id => 'q', :name => 'q', :size => 32,
|
125
|
-
:value =>
|
119
|
+
:value => @q)
|
126
120
|
input(:type => 'submit', :value=> 'Regex Search')
|
127
121
|
}
|
128
122
|
}
|
data/lib/murlsh.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
+
require 'murlsh/doc'
|
2
|
+
require 'murlsh/uri'
|
1
3
|
require 'murlsh/auth'
|
2
4
|
require 'murlsh/dispatch'
|
3
|
-
require 'murlsh/
|
4
|
-
require 'murlsh/get_title'
|
5
|
+
require 'murlsh/failproof'
|
5
6
|
require 'murlsh/openlock'
|
6
7
|
require 'murlsh/plugin'
|
7
|
-
require 'murlsh/referrer'
|
8
8
|
require 'murlsh/sqlite3_adapter'
|
9
9
|
require 'murlsh/time'
|
10
10
|
require 'murlsh/url_server'
|
11
11
|
require 'murlsh/url'
|
12
|
+
require 'murlsh/uri_ask'
|
12
13
|
require 'murlsh/xhtml_response'
|
13
14
|
|
14
15
|
# requiring builder before active_record blows up
|
data/murlsh.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{murlsh}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matthew M. Boedicker"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.default_executable = %q{murlsh}
|
14
14
|
s.description = %q{url sharing site framework with easy adding, title lookup, atom feed, thumbnails and embedding}
|
15
15
|
s.email = %q{matthewm@boedicker.org}
|
@@ -31,14 +31,15 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/murlsh/atom_feed.rb",
|
32
32
|
"lib/murlsh/auth.rb",
|
33
33
|
"lib/murlsh/dispatch.rb",
|
34
|
-
"lib/murlsh/
|
35
|
-
"lib/murlsh/
|
34
|
+
"lib/murlsh/doc.rb",
|
35
|
+
"lib/murlsh/failproof.rb",
|
36
36
|
"lib/murlsh/markup.rb",
|
37
37
|
"lib/murlsh/openlock.rb",
|
38
38
|
"lib/murlsh/plugin.rb",
|
39
|
-
"lib/murlsh/referrer.rb",
|
40
39
|
"lib/murlsh/sqlite3_adapter.rb",
|
41
40
|
"lib/murlsh/time.rb",
|
41
|
+
"lib/murlsh/uri.rb",
|
42
|
+
"lib/murlsh/uri_ask.rb",
|
42
43
|
"lib/murlsh/url.rb",
|
43
44
|
"lib/murlsh/url_body.rb",
|
44
45
|
"lib/murlsh/url_server.rb",
|
@@ -50,17 +51,15 @@ Gem::Specification.new do |s|
|
|
50
51
|
"plugins/update_feed.rb",
|
51
52
|
"public/css/jquery.jgrowl.css",
|
52
53
|
"public/css/screen.css",
|
53
|
-
"public/js/jquery-1.
|
54
|
+
"public/js/jquery-1.4.min.js",
|
54
55
|
"public/js/jquery.cookie.js",
|
55
56
|
"public/js/jquery.jgrowl_compressed.js",
|
56
57
|
"public/js/js.js",
|
57
58
|
"public/swf/player_mp3_mini.swf",
|
59
|
+
"test/atom_feed_test.rb",
|
58
60
|
"test/auth_test.rb",
|
59
|
-
"test/get_charset_test.rb",
|
60
|
-
"test/get_content_type_test.rb",
|
61
|
-
"test/get_title_test.rb",
|
62
61
|
"test/markup_test.rb",
|
63
|
-
"test/
|
62
|
+
"test/uri_ask_test.rb",
|
64
63
|
"test/xhtml_response_test.rb"
|
65
64
|
]
|
66
65
|
s.homepage = %q{http://github.com/mmb/murlsh}
|
@@ -70,12 +69,10 @@ Gem::Specification.new do |s|
|
|
70
69
|
s.summary = %q{url sharing site framework}
|
71
70
|
s.test_files = [
|
72
71
|
"test/xhtml_response_test.rb",
|
72
|
+
"test/uri_ask_test.rb",
|
73
73
|
"test/markup_test.rb",
|
74
|
-
"test/
|
75
|
-
"test/
|
76
|
-
"test/get_content_type_test.rb",
|
77
|
-
"test/auth_test.rb",
|
78
|
-
"test/get_title_test.rb"
|
74
|
+
"test/atom_feed_test.rb",
|
75
|
+
"test/auth_test.rb"
|
79
76
|
]
|
80
77
|
|
81
78
|
if s.respond_to? :specification_version then
|
@@ -8,8 +8,9 @@ module Murlsh
|
|
8
8
|
Hook = 'add_pre'
|
9
9
|
|
10
10
|
def self.run(url, config)
|
11
|
-
|
12
|
-
url.
|
11
|
+
ask = URI(url.url).extend(Murlsh::UriAsk)
|
12
|
+
url.content_type = ask.content_type
|
13
|
+
url.title = ask.title
|
13
14
|
end
|
14
15
|
|
15
16
|
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
div.jGrowl {
|
3
3
|
padding: 10px;
|
4
4
|
z-index: 9999;
|
5
|
+
color: #fff;
|
6
|
+
font-size: 12px;
|
5
7
|
}
|
6
8
|
|
7
9
|
/** Special IE6 Style Positioning **/
|
@@ -76,16 +78,16 @@ div.center div.jGrowl-notification, div.center div.jGrowl-closer {
|
|
76
78
|
|
77
79
|
div.jGrowl div.jGrowl-notification, div.jGrowl div.jGrowl-closer {
|
78
80
|
background-color: #000;
|
79
|
-
color: #fff;
|
80
81
|
opacity: .85;
|
81
|
-
|
82
|
+
-ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=85)";
|
83
|
+
filter: progid:DXImageTransform.Microsoft.Alpha(Opacity=85);
|
82
84
|
zoom: 1;
|
83
85
|
width: 235px;
|
84
86
|
padding: 10px;
|
85
87
|
margin-top: 5px;
|
86
88
|
margin-bottom: 5px;
|
87
89
|
font-family: Tahoma, Arial, Helvetica, sans-serif;
|
88
|
-
font-size:
|
90
|
+
font-size: 1em;
|
89
91
|
text-align: left;
|
90
92
|
display: none;
|
91
93
|
-moz-border-radius: 5px;
|
@@ -98,23 +100,22 @@ div.jGrowl div.jGrowl-notification {
|
|
98
100
|
|
99
101
|
div.jGrowl div.jGrowl-notification div.header {
|
100
102
|
font-weight: bold;
|
101
|
-
font-size:
|
103
|
+
font-size: .85em;
|
102
104
|
}
|
103
105
|
|
104
106
|
div.jGrowl div.jGrowl-notification div.close {
|
105
107
|
z-index: 99;
|
106
108
|
float: right;
|
107
109
|
font-weight: bold;
|
108
|
-
font-size:
|
110
|
+
font-size: 1em;
|
109
111
|
cursor: pointer;
|
110
112
|
}
|
111
113
|
|
112
114
|
div.jGrowl div.jGrowl-closer {
|
113
|
-
height: 15px;
|
114
115
|
padding-top: 4px;
|
115
116
|
padding-bottom: 4px;
|
116
117
|
cursor: pointer;
|
117
|
-
font-size:
|
118
|
+
font-size: .9em;
|
118
119
|
font-weight: bold;
|
119
120
|
text-align: center;
|
120
121
|
}
|
data/public/css/screen.css
CHANGED
@@ -51,11 +51,19 @@ img.thumb, li object {
|
|
51
51
|
margin-right : 10px;
|
52
52
|
}
|
53
53
|
|
54
|
-
|
54
|
+
sub.host {
|
55
55
|
color : #808080;
|
56
56
|
font-family : monospace;
|
57
57
|
}
|
58
58
|
|
59
|
+
span.via {
|
60
|
+
font-size : 0.75em;
|
61
|
+
}
|
62
|
+
|
63
|
+
span.via:before {
|
64
|
+
content : " \2190 \ ";
|
65
|
+
}
|
66
|
+
|
59
67
|
fieldset {
|
60
68
|
border : 0;
|
61
69
|
margin : 0;
|
@@ -121,7 +129,7 @@ div.jGrowl div.jGrowl-closer {
|
|
121
129
|
width : 130px;
|
122
130
|
}
|
123
131
|
|
124
|
-
input#url {
|
132
|
+
input#url, input#via {
|
125
133
|
width : 160px;
|
126
134
|
}
|
127
135
|
|