murlsh 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -21,7 +21,8 @@ config = YAML.load_file('config.yaml')
21
21
 
22
22
  desc "Test remote content type fetch for a URL and show errors."
23
23
  task :content_type, :url do |t, args|
24
- puts Murlsh.get_content_type(args.url, :failproof => false, :debug => STDOUT)
24
+ puts URI(args.url).extend(Murlsh::UriAsk).content_type(:failproof => false,
25
+ :debug => STDOUT)
25
26
  end
26
27
 
27
28
  namespace :db do
@@ -101,7 +102,8 @@ end
101
102
 
102
103
  desc "Test remote title fetch for a URL and show errors."
103
104
  task :title, :url do |t, args|
104
- puts Murlsh.get_title(args.url, :failproof => false, :debug => STDOUT)
105
+ puts URI(args.url).extend(Murlsh::UriAsk).title(:failproof => false,
106
+ :debug => STDOUT)
105
107
  end
106
108
 
107
109
  desc 'Try to fetch the title for a url and update it in the database.'
@@ -111,7 +113,7 @@ task :title_fetch, :url_id do |t, args|
111
113
  url = Murlsh::Url.find(args.url_id)
112
114
  puts "Url: #{url.url}"
113
115
  puts "Previous title: #{url.title}"
114
- url.title = Murlsh.get_title(url.url, :failproof => false)
116
+ url.title = URI(url.url).extend(Murlsh::UriAsk).title(:failproof => false)
115
117
  url.save
116
118
  puts "\nNew title: #{url.title}"
117
119
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
data/config.yaml CHANGED
@@ -13,7 +13,7 @@ css_files:
13
13
  - css/jquery.jgrowl.css
14
14
  - css/screen.css
15
15
  js_files:
16
- - js/jquery-1.3.2.min.js
16
+ - js/jquery-1.4.min.js
17
17
  - js/jquery.cookie.js
18
18
  - js/jquery.jgrowl_compressed.js
19
19
  - js/js.js
@@ -21,18 +21,9 @@ module Murlsh
21
21
  @filename = options[:filename]
22
22
  @title = options[:title]
23
23
 
24
- setup_id_fields
25
- end
26
-
27
- # Set up fields to use for building item ids.
28
- def setup_id_fields
29
- uri_parsed = URI(@root_url)
30
-
31
- m = uri_parsed.host.match(/^(.*?)\.?([^.]+\.[^.]+)$/)
24
+ root_uri = URI(@root_url)
32
25
 
33
- @host, @domain = (m ? m.captures : [uri_parsed.host, ''])
34
-
35
- @path = uri_parsed.path
26
+ @host, @domain, @path = root_uri.host, root_uri.domain, root_uri.path
36
27
  end
37
28
 
38
29
  # Generate the feed and write it to the filesystem with locking.
@@ -54,16 +45,16 @@ module Murlsh
54
45
  entries.each do |mu|
55
46
  xm.entry {
56
47
  xm.author { xm.name(mu.name) }
57
- xm.title(mu.title)
48
+ xm.title(mu.title_stripped)
58
49
  xm.id(entry_id(mu))
59
- xm.summary(mu.title)
50
+ xm.summary(mu.title_stripped)
60
51
  xm.updated(mu.time.xmlschema)
61
52
  xm.link(:href => mu.url)
62
53
  enclosure(xm, mu)
54
+ via(xm, mu)
63
55
  }
64
56
  end
65
57
  }
66
- xm
67
58
  end
68
59
 
69
60
  # Build the entry's id.
@@ -77,6 +68,14 @@ module Murlsh
77
68
  :title => 'Full-size') if mu.is_image?
78
69
  end
79
70
 
71
+ def via(xm, mu)
72
+ begin
73
+ xm.link(:rel => 'via', :type => 'text/html', :href => mu.via,
74
+ :title => URI(mu.via).domain) if mu.via
75
+ rescue Exception
76
+ end
77
+ end
78
+
80
79
  end
81
80
 
82
81
  end
data/lib/murlsh/doc.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+
4
+ module Murlsh
5
+
6
+ # Hpricot:Doc mixin.
7
+ module Doc
8
+
9
+ # Get the character set of the document.
10
+ def charset
11
+ %w{content-type Content-Type}.each do |ct|
12
+ content_type = at("meta[@http-equiv='#{ct}']")
13
+ unless content_type.nil?
14
+ content = content_type['content']
15
+ unless content.nil?
16
+ charset = content[/charset=([\w_.:-]+)/, 1]
17
+ return charset if charset
18
+ end
19
+ end
20
+ end
21
+ nil
22
+ end
23
+
24
+ # Find the title of the document.
25
+ def title
26
+ %w{//html/head/title //head/title //html/title //title}.each do |xpath|
27
+ return (self/xpath).first.inner_html unless (self/xpath).first.nil?
28
+ end
29
+ nil
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,15 @@
1
+
2
+ module Murlsh
3
+
4
+ module_function
5
+
6
+ # Catch all exceptions unless options[:failproof] = false.
7
+ def failproof(options={})
8
+ begin
9
+ yield
10
+ rescue Exception
11
+ raise unless options.fetch(:failproof, true)
12
+ end
13
+ end
14
+
15
+ end
data/lib/murlsh/uri.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'uri'
2
+
3
+ # Extra methods added to URI class.
4
+ class URI::Generic
5
+
6
+ # Return the domain.
7
+ def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
8
+
9
+ # Return the path and query string.
10
+ def path_query; path + (query ? "?#{query}" : ''); end
11
+
12
+ end
@@ -0,0 +1,84 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'open-uri'
4
+ require 'uri'
5
+
6
+ require 'rubygems'
7
+ require 'hpricot'
8
+ require 'htmlentities'
9
+ require 'iconv'
10
+
11
+ module Murlsh
12
+
13
+ # URI mixin.
14
+ module UriAsk
15
+
16
+ # Get the content type.
17
+ #
18
+ # Options:
19
+ # * :failproof - if true hide all exceptions and return empty string on failure
20
+ # * :headers - hash of headers to send in request
21
+ def content_type(options={})
22
+ return @content_type if defined?(@content_type)
23
+ options[:headers] = default_headers.merge(options.fetch(:headers, {}))
24
+
25
+ @content_type = ''
26
+ Murlsh::failproof(options) do
27
+ # try head first to save bandwidth
28
+ http = Net::HTTP.new(host, port)
29
+ http.use_ssl = (scheme == 'https')
30
+
31
+ resp = http.request_head(path_query, options[:headers])
32
+ @content_type = case resp
33
+ when Net::HTTPSuccess then resp['content-type']
34
+ else self.open(options[:headers]) { |f| f.content_type }
35
+ end
36
+ end
37
+ @content_type
38
+ end
39
+
40
+ # Get the HTML title.
41
+ #
42
+ # Options:
43
+ # * :failproof - if true hide all exceptions and return empty string on failure
44
+ # * :headers - hash of headers to send in request
45
+ def title(options={})
46
+ return @title if defined?(@title)
47
+ options[:headers] = default_headers.merge(options.fetch(:headers, {}))
48
+
49
+ @title = to_s
50
+ if might_have_title?(options)
51
+ Murlsh::failproof(options) do
52
+ self.open(options[:headers]) do |f|
53
+ doc = Hpricot(f).extend(Murlsh::Doc)
54
+
55
+ @title = HTMLEntities.new.decode(Iconv.conv('utf-8',
56
+ doc.charset || f.charset, doc.title))
57
+ end
58
+ end
59
+ end
60
+ @title
61
+ end
62
+
63
+ # Default headers sent with the request.
64
+ def default_headers
65
+ result = {
66
+ 'User-Agent' =>
67
+ 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
68
+ }
69
+ if (host || '')[/^www\.nytimes\.com/]
70
+ result['Referer'] = 'http://news.google.com/'
71
+ end
72
+
73
+ result
74
+ end
75
+
76
+ # Return true if the content type is likely to have a title that can be
77
+ # parsed.
78
+ def might_have_title?(options={})
79
+ content_type(options)[/^text\/html/]
80
+ end
81
+
82
+ end
83
+
84
+ end
data/lib/murlsh/url.rb CHANGED
@@ -3,10 +3,6 @@ require 'active_record'
3
3
 
4
4
  require 'uri'
5
5
 
6
- module URI
7
- def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
8
- end
9
-
10
6
  module Murlsh
11
7
 
12
8
  # URL ActiveRecord.
@@ -17,6 +13,10 @@ module Murlsh
17
13
  read_attribute(:title) || read_attribute(:url) || 'title missing'
18
14
  end
19
15
 
16
+ # Title with whitespace compressed and leading and trailing whitespace
17
+ # stripped.
18
+ def title_stripped; title.strip.gsub(/\s+/, ' '); end
19
+
20
20
  # Return true if this url has the same author as another url.
21
21
  def same_author?(other)
22
22
  other and other.email and other.name and
@@ -54,15 +54,14 @@ module Murlsh
54
54
  div(mu.name, :class => 'name') if mu.name
55
55
  end
56
56
 
57
- a(mu.title.strip.gsub(/\s+/, ' '), :href => mu.url)
57
+ a(mu.title_stripped, :href => mu.url)
58
58
 
59
59
  mu.hostrec do |hostrec|
60
- text!(' ')
61
- span(hostrec, :class => 'host')
60
+ self.sub(" [#{hostrec}]", :class => 'host')
62
61
  end
63
62
  mu.viarec do |via|
64
63
  span(:class => 'via') {
65
- text!(' (via '); a(via.domain, :href => via); text!(')')
64
+ text!(' via '); a(via.domain, :href => via)
66
65
  }
67
66
  end
68
67
  span(", #{mu.time.fuzzy}", :class => 'date') if
@@ -115,14 +114,9 @@ module Murlsh
115
114
  # Search form builder.
116
115
  def search_form
117
116
  form(:action => '', :method => 'get') {
118
- value = @q
119
- Murlsh::Referrer.new(@req.referrer).search_query do |refq|
120
- re_parts = refq.split.collect { |x| Regexp.escape(x) }
121
- value = "\\b(#{re_parts.join('|')})\\b"
122
- end
123
117
  fieldset {
124
118
  input(:type => 'text', :id => 'q', :name => 'q', :size => 32,
125
- :value => value)
119
+ :value => @q)
126
120
  input(:type => 'submit', :value=> 'Regex Search')
127
121
  }
128
122
  }
data/lib/murlsh.rb CHANGED
@@ -1,14 +1,15 @@
1
+ require 'murlsh/doc'
2
+ require 'murlsh/uri'
1
3
  require 'murlsh/auth'
2
4
  require 'murlsh/dispatch'
3
- require 'murlsh/get_content_type'
4
- require 'murlsh/get_title'
5
+ require 'murlsh/failproof'
5
6
  require 'murlsh/openlock'
6
7
  require 'murlsh/plugin'
7
- require 'murlsh/referrer'
8
8
  require 'murlsh/sqlite3_adapter'
9
9
  require 'murlsh/time'
10
10
  require 'murlsh/url_server'
11
11
  require 'murlsh/url'
12
+ require 'murlsh/uri_ask'
12
13
  require 'murlsh/xhtml_response'
13
14
 
14
15
  # requiring builder before active_record blows up
data/murlsh.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{murlsh}
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matthew M. Boedicker"]
12
- s.date = %q{2009-12-13}
12
+ s.date = %q{2010-01-15}
13
13
  s.default_executable = %q{murlsh}
14
14
  s.description = %q{url sharing site framework with easy adding, title lookup, atom feed, thumbnails and embedding}
15
15
  s.email = %q{matthewm@boedicker.org}
@@ -31,14 +31,15 @@ Gem::Specification.new do |s|
31
31
  "lib/murlsh/atom_feed.rb",
32
32
  "lib/murlsh/auth.rb",
33
33
  "lib/murlsh/dispatch.rb",
34
- "lib/murlsh/get_content_type.rb",
35
- "lib/murlsh/get_title.rb",
34
+ "lib/murlsh/doc.rb",
35
+ "lib/murlsh/failproof.rb",
36
36
  "lib/murlsh/markup.rb",
37
37
  "lib/murlsh/openlock.rb",
38
38
  "lib/murlsh/plugin.rb",
39
- "lib/murlsh/referrer.rb",
40
39
  "lib/murlsh/sqlite3_adapter.rb",
41
40
  "lib/murlsh/time.rb",
41
+ "lib/murlsh/uri.rb",
42
+ "lib/murlsh/uri_ask.rb",
42
43
  "lib/murlsh/url.rb",
43
44
  "lib/murlsh/url_body.rb",
44
45
  "lib/murlsh/url_server.rb",
@@ -50,17 +51,15 @@ Gem::Specification.new do |s|
50
51
  "plugins/update_feed.rb",
51
52
  "public/css/jquery.jgrowl.css",
52
53
  "public/css/screen.css",
53
- "public/js/jquery-1.3.2.min.js",
54
+ "public/js/jquery-1.4.min.js",
54
55
  "public/js/jquery.cookie.js",
55
56
  "public/js/jquery.jgrowl_compressed.js",
56
57
  "public/js/js.js",
57
58
  "public/swf/player_mp3_mini.swf",
59
+ "test/atom_feed_test.rb",
58
60
  "test/auth_test.rb",
59
- "test/get_charset_test.rb",
60
- "test/get_content_type_test.rb",
61
- "test/get_title_test.rb",
62
61
  "test/markup_test.rb",
63
- "test/referrer_test.rb",
62
+ "test/uri_ask_test.rb",
64
63
  "test/xhtml_response_test.rb"
65
64
  ]
66
65
  s.homepage = %q{http://github.com/mmb/murlsh}
@@ -70,12 +69,10 @@ Gem::Specification.new do |s|
70
69
  s.summary = %q{url sharing site framework}
71
70
  s.test_files = [
72
71
  "test/xhtml_response_test.rb",
72
+ "test/uri_ask_test.rb",
73
73
  "test/markup_test.rb",
74
- "test/referrer_test.rb",
75
- "test/get_charset_test.rb",
76
- "test/get_content_type_test.rb",
77
- "test/auth_test.rb",
78
- "test/get_title_test.rb"
74
+ "test/atom_feed_test.rb",
75
+ "test/auth_test.rb"
79
76
  ]
80
77
 
81
78
  if s.respond_to? :specification_version then
@@ -8,8 +8,9 @@ module Murlsh
8
8
  Hook = 'add_pre'
9
9
 
10
10
  def self.run(url, config)
11
- url.content_type = Murlsh.get_content_type(url.url)
12
- url.title = Murlsh.get_title(url.url, :content_type => url.content_type)
11
+ ask = URI(url.url).extend(Murlsh::UriAsk)
12
+ url.content_type = ask.content_type
13
+ url.title = ask.title
13
14
  end
14
15
 
15
16
  end
@@ -2,6 +2,8 @@
2
2
  div.jGrowl {
3
3
  padding: 10px;
4
4
  z-index: 9999;
5
+ color: #fff;
6
+ font-size: 12px;
5
7
  }
6
8
 
7
9
  /** Special IE6 Style Positioning **/
@@ -76,16 +78,16 @@ div.center div.jGrowl-notification, div.center div.jGrowl-closer {
76
78
 
77
79
  div.jGrowl div.jGrowl-notification, div.jGrowl div.jGrowl-closer {
78
80
  background-color: #000;
79
- color: #fff;
80
81
  opacity: .85;
81
- filter: alpha(opacity = 85);
82
+ -ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=85)";
83
+ filter: progid:DXImageTransform.Microsoft.Alpha(Opacity=85);
82
84
  zoom: 1;
83
85
  width: 235px;
84
86
  padding: 10px;
85
87
  margin-top: 5px;
86
88
  margin-bottom: 5px;
87
89
  font-family: Tahoma, Arial, Helvetica, sans-serif;
88
- font-size: 12px;
90
+ font-size: 1em;
89
91
  text-align: left;
90
92
  display: none;
91
93
  -moz-border-radius: 5px;
@@ -98,23 +100,22 @@ div.jGrowl div.jGrowl-notification {
98
100
 
99
101
  div.jGrowl div.jGrowl-notification div.header {
100
102
  font-weight: bold;
101
- font-size: 10px;
103
+ font-size: .85em;
102
104
  }
103
105
 
104
106
  div.jGrowl div.jGrowl-notification div.close {
105
107
  z-index: 99;
106
108
  float: right;
107
109
  font-weight: bold;
108
- font-size: 12px;
110
+ font-size: 1em;
109
111
  cursor: pointer;
110
112
  }
111
113
 
112
114
  div.jGrowl div.jGrowl-closer {
113
- height: 15px;
114
115
  padding-top: 4px;
115
116
  padding-bottom: 4px;
116
117
  cursor: pointer;
117
- font-size: 11px;
118
+ font-size: .9em;
118
119
  font-weight: bold;
119
120
  text-align: center;
120
121
  }
@@ -51,11 +51,19 @@ img.thumb, li object {
51
51
  margin-right : 10px;
52
52
  }
53
53
 
54
- span.host {
54
+ sub.host {
55
55
  color : #808080;
56
56
  font-family : monospace;
57
57
  }
58
58
 
59
+ span.via {
60
+ font-size : 0.75em;
61
+ }
62
+
63
+ span.via:before {
64
+ content : " \2190 \ ";
65
+ }
66
+
59
67
  fieldset {
60
68
  border : 0;
61
69
  margin : 0;
@@ -121,7 +129,7 @@ div.jGrowl div.jGrowl-closer {
121
129
  width : 130px;
122
130
  }
123
131
 
124
- input#url {
132
+ input#url, input#via {
125
133
  width : 160px;
126
134
  }
127
135