murlsh 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -21,7 +21,8 @@ config = YAML.load_file('config.yaml')
21
21
 
22
22
  desc "Test remote content type fetch for a URL and show errors."
23
23
  task :content_type, :url do |t, args|
24
- puts Murlsh.get_content_type(args.url, :failproof => false, :debug => STDOUT)
24
+ puts URI(args.url).extend(Murlsh::UriAsk).content_type(:failproof => false,
25
+ :debug => STDOUT)
25
26
  end
26
27
 
27
28
  namespace :db do
@@ -101,7 +102,8 @@ end
101
102
 
102
103
  desc "Test remote title fetch for a URL and show errors."
103
104
  task :title, :url do |t, args|
104
- puts Murlsh.get_title(args.url, :failproof => false, :debug => STDOUT)
105
+ puts URI(args.url).extend(Murlsh::UriAsk).title(:failproof => false,
106
+ :debug => STDOUT)
105
107
  end
106
108
 
107
109
  desc 'Try to fetch the title for a url and update it in the database.'
@@ -111,7 +113,7 @@ task :title_fetch, :url_id do |t, args|
111
113
  url = Murlsh::Url.find(args.url_id)
112
114
  puts "Url: #{url.url}"
113
115
  puts "Previous title: #{url.title}"
114
- url.title = Murlsh.get_title(url.url, :failproof => false)
116
+ url.title = URI(url.url).extend(Murlsh::UriAsk).title(:failproof => false)
115
117
  url.save
116
118
  puts "\nNew title: #{url.title}"
117
119
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
data/config.yaml CHANGED
@@ -13,7 +13,7 @@ css_files:
13
13
  - css/jquery.jgrowl.css
14
14
  - css/screen.css
15
15
  js_files:
16
- - js/jquery-1.3.2.min.js
16
+ - js/jquery-1.4.min.js
17
17
  - js/jquery.cookie.js
18
18
  - js/jquery.jgrowl_compressed.js
19
19
  - js/js.js
@@ -21,18 +21,9 @@ module Murlsh
21
21
  @filename = options[:filename]
22
22
  @title = options[:title]
23
23
 
24
- setup_id_fields
25
- end
26
-
27
- # Set up fields to use for building item ids.
28
- def setup_id_fields
29
- uri_parsed = URI(@root_url)
30
-
31
- m = uri_parsed.host.match(/^(.*?)\.?([^.]+\.[^.]+)$/)
24
+ root_uri = URI(@root_url)
32
25
 
33
- @host, @domain = (m ? m.captures : [uri_parsed.host, ''])
34
-
35
- @path = uri_parsed.path
26
+ @host, @domain, @path = root_uri.host, root_uri.domain, root_uri.path
36
27
  end
37
28
 
38
29
  # Generate the feed and write it to the filesystem with locking.
@@ -54,16 +45,16 @@ module Murlsh
54
45
  entries.each do |mu|
55
46
  xm.entry {
56
47
  xm.author { xm.name(mu.name) }
57
- xm.title(mu.title)
48
+ xm.title(mu.title_stripped)
58
49
  xm.id(entry_id(mu))
59
- xm.summary(mu.title)
50
+ xm.summary(mu.title_stripped)
60
51
  xm.updated(mu.time.xmlschema)
61
52
  xm.link(:href => mu.url)
62
53
  enclosure(xm, mu)
54
+ via(xm, mu)
63
55
  }
64
56
  end
65
57
  }
66
- xm
67
58
  end
68
59
 
69
60
  # Build the entry's id.
@@ -77,6 +68,14 @@ module Murlsh
77
68
  :title => 'Full-size') if mu.is_image?
78
69
  end
79
70
 
71
+ def via(xm, mu)
72
+ begin
73
+ xm.link(:rel => 'via', :type => 'text/html', :href => mu.via,
74
+ :title => URI(mu.via).domain) if mu.via
75
+ rescue Exception
76
+ end
77
+ end
78
+
80
79
  end
81
80
 
82
81
  end
data/lib/murlsh/doc.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+
4
+ module Murlsh
5
+
6
+ # Hpricot:Doc mixin.
7
+ module Doc
8
+
9
+ # Get the character set of the document.
10
+ def charset
11
+ %w{content-type Content-Type}.each do |ct|
12
+ content_type = at("meta[@http-equiv='#{ct}']")
13
+ unless content_type.nil?
14
+ content = content_type['content']
15
+ unless content.nil?
16
+ charset = content[/charset=([\w_.:-]+)/, 1]
17
+ return charset if charset
18
+ end
19
+ end
20
+ end
21
+ nil
22
+ end
23
+
24
+ # Find the title of the document.
25
+ def title
26
+ %w{//html/head/title //head/title //html/title //title}.each do |xpath|
27
+ return (self/xpath).first.inner_html unless (self/xpath).first.nil?
28
+ end
29
+ nil
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,15 @@
1
+
2
+ module Murlsh
3
+
4
+ module_function
5
+
6
+ # Catch all exceptions unless options[:failproof] = false.
7
+ def failproof(options={})
8
+ begin
9
+ yield
10
+ rescue Exception
11
+ raise unless options.fetch(:failproof, true)
12
+ end
13
+ end
14
+
15
+ end
data/lib/murlsh/uri.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'uri'
2
+
3
+ # Extra methods added to URI class.
4
+ class URI::Generic
5
+
6
+ # Return the domain.
7
+ def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
8
+
9
+ # Return the path and query string.
10
+ def path_query; path + (query ? "?#{query}" : ''); end
11
+
12
+ end
@@ -0,0 +1,84 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'open-uri'
4
+ require 'uri'
5
+
6
+ require 'rubygems'
7
+ require 'hpricot'
8
+ require 'htmlentities'
9
+ require 'iconv'
10
+
11
+ module Murlsh
12
+
13
+ # URI mixin.
14
+ module UriAsk
15
+
16
+ # Get the content type.
17
+ #
18
+ # Options:
19
+ # * :failproof - if true hide all exceptions and return empty string on failure
20
+ # * :headers - hash of headers to send in request
21
+ def content_type(options={})
22
+ return @content_type if defined?(@content_type)
23
+ options[:headers] = default_headers.merge(options.fetch(:headers, {}))
24
+
25
+ @content_type = ''
26
+ Murlsh::failproof(options) do
27
+ # try head first to save bandwidth
28
+ http = Net::HTTP.new(host, port)
29
+ http.use_ssl = (scheme == 'https')
30
+
31
+ resp = http.request_head(path_query, options[:headers])
32
+ @content_type = case resp
33
+ when Net::HTTPSuccess then resp['content-type']
34
+ else self.open(options[:headers]) { |f| f.content_type }
35
+ end
36
+ end
37
+ @content_type
38
+ end
39
+
40
+ # Get the HTML title.
41
+ #
42
+ # Options:
43
+ # * :failproof - if true hide all exceptions and return empty string on failure
44
+ # * :headers - hash of headers to send in request
45
+ def title(options={})
46
+ return @title if defined?(@title)
47
+ options[:headers] = default_headers.merge(options.fetch(:headers, {}))
48
+
49
+ @title = to_s
50
+ if might_have_title?(options)
51
+ Murlsh::failproof(options) do
52
+ self.open(options[:headers]) do |f|
53
+ doc = Hpricot(f).extend(Murlsh::Doc)
54
+
55
+ @title = HTMLEntities.new.decode(Iconv.conv('utf-8',
56
+ doc.charset || f.charset, doc.title))
57
+ end
58
+ end
59
+ end
60
+ @title
61
+ end
62
+
63
+ # Default headers sent with the request.
64
+ def default_headers
65
+ result = {
66
+ 'User-Agent' =>
67
+ 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
68
+ }
69
+ if (host || '')[/^www\.nytimes\.com/]
70
+ result['Referer'] = 'http://news.google.com/'
71
+ end
72
+
73
+ result
74
+ end
75
+
76
+ # Return true if the content type is likely to have a title that can be
77
+ # parsed.
78
+ def might_have_title?(options={})
79
+ content_type(options)[/^text\/html/]
80
+ end
81
+
82
+ end
83
+
84
+ end
data/lib/murlsh/url.rb CHANGED
@@ -3,10 +3,6 @@ require 'active_record'
3
3
 
4
4
  require 'uri'
5
5
 
6
- module URI
7
- def domain; host[/[a-z\d-]+\.[a-z]{2,}(\.[a-z]{2})?$/].downcase; end
8
- end
9
-
10
6
  module Murlsh
11
7
 
12
8
  # URL ActiveRecord.
@@ -17,6 +13,10 @@ module Murlsh
17
13
  read_attribute(:title) || read_attribute(:url) || 'title missing'
18
14
  end
19
15
 
16
+ # Title with whitespace compressed and leading and trailing whitespace
17
+ # stripped.
18
+ def title_stripped; title.strip.gsub(/\s+/, ' '); end
19
+
20
20
  # Return true if this url has the same author as another url.
21
21
  def same_author?(other)
22
22
  other and other.email and other.name and
@@ -54,15 +54,14 @@ module Murlsh
54
54
  div(mu.name, :class => 'name') if mu.name
55
55
  end
56
56
 
57
- a(mu.title.strip.gsub(/\s+/, ' '), :href => mu.url)
57
+ a(mu.title_stripped, :href => mu.url)
58
58
 
59
59
  mu.hostrec do |hostrec|
60
- text!(' ')
61
- span(hostrec, :class => 'host')
60
+ self.sub(" [#{hostrec}]", :class => 'host')
62
61
  end
63
62
  mu.viarec do |via|
64
63
  span(:class => 'via') {
65
- text!(' (via '); a(via.domain, :href => via); text!(')')
64
+ text!(' via '); a(via.domain, :href => via)
66
65
  }
67
66
  end
68
67
  span(", #{mu.time.fuzzy}", :class => 'date') if
@@ -115,14 +114,9 @@ module Murlsh
115
114
  # Search form builder.
116
115
  def search_form
117
116
  form(:action => '', :method => 'get') {
118
- value = @q
119
- Murlsh::Referrer.new(@req.referrer).search_query do |refq|
120
- re_parts = refq.split.collect { |x| Regexp.escape(x) }
121
- value = "\\b(#{re_parts.join('|')})\\b"
122
- end
123
117
  fieldset {
124
118
  input(:type => 'text', :id => 'q', :name => 'q', :size => 32,
125
- :value => value)
119
+ :value => @q)
126
120
  input(:type => 'submit', :value=> 'Regex Search')
127
121
  }
128
122
  }
data/lib/murlsh.rb CHANGED
@@ -1,14 +1,15 @@
1
+ require 'murlsh/doc'
2
+ require 'murlsh/uri'
1
3
  require 'murlsh/auth'
2
4
  require 'murlsh/dispatch'
3
- require 'murlsh/get_content_type'
4
- require 'murlsh/get_title'
5
+ require 'murlsh/failproof'
5
6
  require 'murlsh/openlock'
6
7
  require 'murlsh/plugin'
7
- require 'murlsh/referrer'
8
8
  require 'murlsh/sqlite3_adapter'
9
9
  require 'murlsh/time'
10
10
  require 'murlsh/url_server'
11
11
  require 'murlsh/url'
12
+ require 'murlsh/uri_ask'
12
13
  require 'murlsh/xhtml_response'
13
14
 
14
15
  # requiring builder before active_record blows up
data/murlsh.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{murlsh}
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matthew M. Boedicker"]
12
- s.date = %q{2009-12-13}
12
+ s.date = %q{2010-01-15}
13
13
  s.default_executable = %q{murlsh}
14
14
  s.description = %q{url sharing site framework with easy adding, title lookup, atom feed, thumbnails and embedding}
15
15
  s.email = %q{matthewm@boedicker.org}
@@ -31,14 +31,15 @@ Gem::Specification.new do |s|
31
31
  "lib/murlsh/atom_feed.rb",
32
32
  "lib/murlsh/auth.rb",
33
33
  "lib/murlsh/dispatch.rb",
34
- "lib/murlsh/get_content_type.rb",
35
- "lib/murlsh/get_title.rb",
34
+ "lib/murlsh/doc.rb",
35
+ "lib/murlsh/failproof.rb",
36
36
  "lib/murlsh/markup.rb",
37
37
  "lib/murlsh/openlock.rb",
38
38
  "lib/murlsh/plugin.rb",
39
- "lib/murlsh/referrer.rb",
40
39
  "lib/murlsh/sqlite3_adapter.rb",
41
40
  "lib/murlsh/time.rb",
41
+ "lib/murlsh/uri.rb",
42
+ "lib/murlsh/uri_ask.rb",
42
43
  "lib/murlsh/url.rb",
43
44
  "lib/murlsh/url_body.rb",
44
45
  "lib/murlsh/url_server.rb",
@@ -50,17 +51,15 @@ Gem::Specification.new do |s|
50
51
  "plugins/update_feed.rb",
51
52
  "public/css/jquery.jgrowl.css",
52
53
  "public/css/screen.css",
53
- "public/js/jquery-1.3.2.min.js",
54
+ "public/js/jquery-1.4.min.js",
54
55
  "public/js/jquery.cookie.js",
55
56
  "public/js/jquery.jgrowl_compressed.js",
56
57
  "public/js/js.js",
57
58
  "public/swf/player_mp3_mini.swf",
59
+ "test/atom_feed_test.rb",
58
60
  "test/auth_test.rb",
59
- "test/get_charset_test.rb",
60
- "test/get_content_type_test.rb",
61
- "test/get_title_test.rb",
62
61
  "test/markup_test.rb",
63
- "test/referrer_test.rb",
62
+ "test/uri_ask_test.rb",
64
63
  "test/xhtml_response_test.rb"
65
64
  ]
66
65
  s.homepage = %q{http://github.com/mmb/murlsh}
@@ -70,12 +69,10 @@ Gem::Specification.new do |s|
70
69
  s.summary = %q{url sharing site framework}
71
70
  s.test_files = [
72
71
  "test/xhtml_response_test.rb",
72
+ "test/uri_ask_test.rb",
73
73
  "test/markup_test.rb",
74
- "test/referrer_test.rb",
75
- "test/get_charset_test.rb",
76
- "test/get_content_type_test.rb",
77
- "test/auth_test.rb",
78
- "test/get_title_test.rb"
74
+ "test/atom_feed_test.rb",
75
+ "test/auth_test.rb"
79
76
  ]
80
77
 
81
78
  if s.respond_to? :specification_version then
@@ -8,8 +8,9 @@ module Murlsh
8
8
  Hook = 'add_pre'
9
9
 
10
10
  def self.run(url, config)
11
- url.content_type = Murlsh.get_content_type(url.url)
12
- url.title = Murlsh.get_title(url.url, :content_type => url.content_type)
11
+ ask = URI(url.url).extend(Murlsh::UriAsk)
12
+ url.content_type = ask.content_type
13
+ url.title = ask.title
13
14
  end
14
15
 
15
16
  end
@@ -2,6 +2,8 @@
2
2
  div.jGrowl {
3
3
  padding: 10px;
4
4
  z-index: 9999;
5
+ color: #fff;
6
+ font-size: 12px;
5
7
  }
6
8
 
7
9
  /** Special IE6 Style Positioning **/
@@ -76,16 +78,16 @@ div.center div.jGrowl-notification, div.center div.jGrowl-closer {
76
78
 
77
79
  div.jGrowl div.jGrowl-notification, div.jGrowl div.jGrowl-closer {
78
80
  background-color: #000;
79
- color: #fff;
80
81
  opacity: .85;
81
- filter: alpha(opacity = 85);
82
+ -ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=85)";
83
+ filter: progid:DXImageTransform.Microsoft.Alpha(Opacity=85);
82
84
  zoom: 1;
83
85
  width: 235px;
84
86
  padding: 10px;
85
87
  margin-top: 5px;
86
88
  margin-bottom: 5px;
87
89
  font-family: Tahoma, Arial, Helvetica, sans-serif;
88
- font-size: 12px;
90
+ font-size: 1em;
89
91
  text-align: left;
90
92
  display: none;
91
93
  -moz-border-radius: 5px;
@@ -98,23 +100,22 @@ div.jGrowl div.jGrowl-notification {
98
100
 
99
101
  div.jGrowl div.jGrowl-notification div.header {
100
102
  font-weight: bold;
101
- font-size: 10px;
103
+ font-size: .85em;
102
104
  }
103
105
 
104
106
  div.jGrowl div.jGrowl-notification div.close {
105
107
  z-index: 99;
106
108
  float: right;
107
109
  font-weight: bold;
108
- font-size: 12px;
110
+ font-size: 1em;
109
111
  cursor: pointer;
110
112
  }
111
113
 
112
114
  div.jGrowl div.jGrowl-closer {
113
- height: 15px;
114
115
  padding-top: 4px;
115
116
  padding-bottom: 4px;
116
117
  cursor: pointer;
117
- font-size: 11px;
118
+ font-size: .9em;
118
119
  font-weight: bold;
119
120
  text-align: center;
120
121
  }
@@ -51,11 +51,19 @@ img.thumb, li object {
51
51
  margin-right : 10px;
52
52
  }
53
53
 
54
- span.host {
54
+ sub.host {
55
55
  color : #808080;
56
56
  font-family : monospace;
57
57
  }
58
58
 
59
+ span.via {
60
+ font-size : 0.75em;
61
+ }
62
+
63
+ span.via:before {
64
+ content : " \2190 \ ";
65
+ }
66
+
59
67
  fieldset {
60
68
  border : 0;
61
69
  margin : 0;
@@ -121,7 +129,7 @@ div.jGrowl div.jGrowl-closer {
121
129
  width : 130px;
122
130
  }
123
131
 
124
- input#url {
132
+ input#url, input#via {
125
133
  width : 160px;
126
134
  }
127
135