object-scraper 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest ADDED
@@ -0,0 +1,10 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ lib/object-scraper.rb
5
+ lib/object-scraper/scraper.rb
6
+ object-scraper.gemspec
7
+ spec/data/twitter.html
8
+ spec/object-scraper/scraper_spec.rb
9
+ spec/spec.opts
10
+ spec/spec_helper.rb
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = Object Scraper
2
+
3
+ == Description
4
+
5
+ Object scraper is a thin wrapper for hpricot to enable receipt-like
6
+ extraction of ruby objects from various web sites.
7
+
8
+ == Install
9
+
10
+ === Gem
11
+
12
+ gem install object-scraper --source http://gemcutter.org
13
+
14
+ === Rails
15
+
16
+ config.gem 'object-scraper', :source => 'http://gemcutter.org'
17
+
18
+ == Example
19
+
20
+ class Entry < Object
21
+ attr_accessor :text, :date
22
+ end
23
+
24
+ uri = "http://twitter.com/twitter"
25
+ pattern = ".status"
26
+
27
+ Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
28
+ s.text { |node| node.at(".entry-content").inner_html }
29
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
30
+ end
31
+
32
+ @objects = Scraper.parse(:twitter)
33
+
34
+ == Advanced Example
35
+
36
+ It is possible to use other existing HTML parsers instead of hpricot.
37
+ Just overwrite the according proc object.
38
+
39
+ require 'nokogiri'
40
+ Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
41
+
42
+ Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
43
+ # initialize your objects here accordingly
44
+ end
45
+
46
+ == Rails
47
+
48
+ All scraper definitions sitting in RAILS_ROOT/scrapers will be taken into account
49
+ automatically when you use object-scraper as a gem in your rails project.
50
+
51
+ == Author
52
+
53
+ - Maintained by {Enrico Genauck}[mailto:kontakt@enricogenauck.de]
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('object-scraper', '0.0.2') do |p|
6
+ p.summary = "Recipe like object extraction from HTML sources"
7
+ p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
+ p.url = "http://github.com/enricogenauck/object-scraper"
9
+ p.author = "Enrico Genauck"
10
+ p.email = "kontakt@enricogenauck.de"
11
+ p.ignore_pattern = ["tmp/*", "script/*"]
12
+ p.development_dependencies = []
13
+ p.runtime_dependencies = ["hpricot >=0.8.2"]
14
+ end
15
+
16
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,128 @@
1
+ class Scraper
2
+
3
+ # Raised when a scraper is defined with the same name as a previously-defined scraper.
4
+ class DuplicateDefinitionError < RuntimeError
5
+ end
6
+
7
+ class << self
8
+ attr_accessor :scrapers
9
+ attr_accessor :scrape_source_with
10
+ attr_accessor :definition_file_paths
11
+ end
12
+
13
+ self.scrapers = {}
14
+ self.scrape_source_with = Proc.new { |source| Hpricot(source) }
15
+ self.definition_file_paths = %w(scrapers)
16
+
17
+ attr_reader :scraper_source, :scraper_node
18
+
19
+ def self.define(name, options = {}, &block)
20
+ instance = Scraper.new(name, options, &block)
21
+
22
+ if self.scrapers[name]
23
+ raise DuplicateDefinitionError, "Scraper already defined: #{name}"
24
+ end
25
+
26
+ self.scrapers[name] = instance
27
+ end
28
+
29
+ def initialize(name, options = {}, &block) #:nodoc:
30
+ assert_valid_options(options)
31
+ @objects = []
32
+ @class = class_for(options[:class])
33
+ @scraper_source = options[:source]
34
+ @scraper_node = options[:node]
35
+ @block = block
36
+ end
37
+
38
+ def self.get(name)
39
+ scraper_by_name(name)
40
+ end
41
+
42
+ def self.parse(name)
43
+ scraper_by_name(name).parse
44
+ end
45
+
46
+ def parse
47
+ doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
48
+ doc.search(@scraper_node).each do |n|
49
+ @current_node = n
50
+ @current_object = @class.new
51
+ @objects << @current_object
52
+ @block.call(self)
53
+ end
54
+ @objects
55
+ end
56
+
57
+ def self.scraper_by_name(name)
58
+ scrapers[name.to_sym] or raise ArgumentError, "No such scraper: #{name.to_s}"
59
+ end
60
+
61
+ def method_missing(symbol, *args, &block)
62
+ if block_given?
63
+ @current_object.send("#{symbol}=", yield(@current_node))
64
+ else
65
+ @current_object.send("#{symbol}=", args.first)
66
+ end
67
+ end
68
+
69
+ def self.find_definitions
70
+ definition_file_paths.each do |path|
71
+ require("#{path}.rb") if File.exists?("#{path}.rb")
72
+
73
+ if File.directory? path
74
+ Dir[File.join(path, '*.rb')].each do |file|
75
+ require file
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def class_for(class_or_to_s)
84
+ if class_or_to_s.respond_to?(:to_sym)
85
+ Object.const_get(variable_name_to_class_name(class_or_to_s))
86
+ else
87
+ class_or_to_s
88
+ end
89
+ end
90
+
91
+ def scraper_name_for(class_or_to_s)
92
+ if class_or_to_s.respond_to?(:to_sym)
93
+ class_or_to_s.to_sym
94
+ else
95
+ class_name_to_variable_name(class_or_to_s).to_sym
96
+ end
97
+ end
98
+
99
+ def class_name_to_variable_name(name)
100
+ name.to_s.gsub(/::/, '/').
101
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
102
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
103
+ tr("-", "_").
104
+ downcase
105
+ end
106
+
107
+ def variable_name_to_class_name(name)
108
+ name.to_s.
109
+ gsub(/\/(.?)/) { "::#{$1.upcase}" }.
110
+ gsub(/(?:^|_)(.)/) { $1.upcase }
111
+ end
112
+
113
+ def assert_valid_options(options)
114
+ invalid_keys = options.keys - [:class, :source, :node]
115
+ unless invalid_keys == []
116
+ raise ArgumentError, "Unknown arguments: #{invalid_keys.inspect}"
117
+ end
118
+ unless options[:class]
119
+ raise ArgumentError, "Missing argument: :class"
120
+ end
121
+ unless options[:source]
122
+ raise ArgumentError, "Missing argument: :source"
123
+ end
124
+ unless options[:node]
125
+ raise ArgumentError, "Missing argument: :node"
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,20 @@
1
+ require 'object-scraper/scraper'
2
+ require 'open-uri'
3
+ require 'hpricot'
4
+
5
+ # Shortcut for Scraper.extract
6
+ #
7
+ # Example:
8
+ # Scraper(:my_space)
9
+ def Scraper(name)
10
+ Scraper.get(name)
11
+ end
12
+
13
+ if defined? Rails.configuration
14
+ Rails.configuration.after_initialize do
15
+ Scraper.definition_file_paths = [File.join(RAILS_ROOT, 'scrapers')]
16
+ Scraper.find_definitions
17
+ end
18
+ else
19
+ Scraper.find_definitions
20
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{object-scraper}
5
+ s.version = "0.0.2"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Enrico Genauck"]
9
+ s.date = %q{2009-12-10}
10
+ s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
+ s.email = %q{kontakt@enricogenauck.de}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "object-scraper.gemspec", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
14
+ s.homepage = %q{http://github.com/enricogenauck/object-scraper}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Object-scraper", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{object-scraper}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{Recipe like object extraction from HTML sources}
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ s.add_runtime_dependency(%q<hpricot>, [">= 0.8.2"])
27
+ else
28
+ s.add_dependency(%q<hpricot>, [">= 0.8.2"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<hpricot>, [">= 0.8.2"])
32
+ end
33
+ end
@@ -0,0 +1,731 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head>
4
+ <script type="text/javascript">
5
+ //<![CDATA[
6
+ var page={};var onCondition=function(D,C,A,B){D=D;A=A?Math.min(A,5):5;B=B||100;if(D()){C()}else{if(A>1){setTimeout(function(){onCondition(D,C,A-1,B)},B)}}};
7
+ //]]>
8
+ </script>
9
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
10
+ <meta content="en-us" http-equiv="Content-Language" />
11
+ <meta content="Always wondering what's happening. " name="description" />
12
+ <meta content="no" http-equiv="imagetoolbar" />
13
+ <meta content="width = 780" name="viewport" />
14
+ <meta content="4FTTxY4uvo0RZTMQqIyhh18HsepyJOctQ+XTOu1zsfE=" name="verify-v1" />
15
+ <meta content="1" name="page" />
16
+ <meta content="NOODP" name="robots" />
17
+ <meta content="n" name="session-loggedin" />
18
+ <meta content="twitter" name="page-user-screen_name" />
19
+ <title id="page_title">Twitter (twitter) on Twitter</title>
20
+ <link href="http://a1.twimg.com/a/1259091217/images/favicon.ico" rel="shortcut icon" type="image/x-icon" />
21
+ <link href="http://a1.twimg.com/a/1259091217/images/twitter_57.png" rel="apple-touch-icon" />
22
+ <link rel="alternate" href="http://twitter.com/statuses/user_timeline/783214.rss" title="twitter's Tweets" type="application/rss+xml" />
23
+ <link rel="alternate" href="http://twitter.com/favorites/783214.rss" title="twitter's Favorites" type="application/rss+xml" />
24
+
25
+
26
+ <link href="http://a1.twimg.com/a/1259091217/stylesheets/twitter.css?1259117012" media="screen" rel="stylesheet" type="text/css" />
27
+ <style type="text/css">
28
+
29
+ body { background: #C0DEED url('http://a3.twimg.com/a/1259091217/images/bg-clouds.png') repeat-x; }
30
+
31
+
32
+ body {
33
+ background-position: 0 0;
34
+ padding-top: 0;
35
+ }
36
+
37
+ </style>
38
+ <link href="http://a0.twimg.com/a/1259091217/stylesheets/following.css?1259117012" media="screen, projection" rel="stylesheet" type="text/css" />
39
+
40
+ </head>
41
+
42
+ <body class="account safari" id="profile">
43
+ <script type="text/javascript">
44
+ //<![CDATA[
45
+ if (window.top !== window.self) {document.write = "";window.top.location = window.self.location; setTimeout(function(){document.body.innerHTML='';},1);window.self.onload=function(evt){document.body.innerHTML='';};}
46
+ //]]>
47
+ </script>
48
+
49
+
50
+ <div id="dim-screen"></div>
51
+ <ul id="accessibility" class="offscreen">
52
+ <li><a href="#content" accesskey="0">Skip past navigation</a></li>
53
+ <li>On a mobile phone? Check out <a href="http://m.twitter.com/">m.twitter.com</a>!</li>
54
+ <li><a href="#footer" accesskey="2">Skip to navigation</a></li>
55
+ <li><a href="#signin">Skip to sign in form</a></li>
56
+ </ul>
57
+
58
+
59
+
60
+ <div id="container" class="subpage">
61
+ <span id="loader" style="display:none"><img alt="Loader" src="http://a0.twimg.com/a/1259091217/images/loader.gif" /></span>
62
+ <div id="header">
63
+ <a href="http://twitter.com/" title="Twitter / Home" accesskey="1" id="logo">
64
+ <img alt="Twitter.com" height="36" src="http://a0.twimg.com/a/1259091217/images/twitter_logo_header.png" width="155" />
65
+ </a>
66
+ <form method="post" id="sign_out_form" action="/sessions/destroy" style="display:none;">
67
+ <input name="authenticity_token" value="7e394453cc9d849cab133ccb3180e8ae4e6a0258" type="hidden" />
68
+ </form>
69
+
70
+ <ul class="top-navigation round">
71
+ <li><a href="/login" accesskey="l">Login</a></li>
72
+ <li class="signup-link"><a href="/signup">Join Twitter!</a></li>
73
+ </ul>
74
+ </div>
75
+
76
+
77
+
78
+ <div id="profilebox_outer">
79
+ <div id="profilebird"><img alt="Profile_bird" height="48" id="profilebirdimg" src="http://a0.twimg.com/a/1259091217/images/profile_bird.png" width="48" /></div>
80
+ <div id="profilebox" class="clearfix">
81
+ <div id="profiletext">
82
+ <h1>Hey there! <strong>twitter</strong> is using Twitter.</h1>
83
+ <h2>Twitter is a free service that lets you keep in touch with people through the exchange of quick, frequent answers to one simple question: What's happening? <strong>Join today</strong> to start receiving <strong>twitter's</strong> tweets.</h2>
84
+ </div>
85
+ <div id="profilebutton">
86
+ <form name="account_signup_form" id="account_signup_form" action="/signup">
87
+ <input id="follow" name="follow" type="hidden" value="twitter" />
88
+ <input class="profilesubmit" id="join" name="commit" type="submit" value="Join today!" />
89
+ </form>
90
+ <p><small>Already using Twitter<br /> from your phone? <a href="/account/complete">Click here</a>.</small></p>
91
+ </div>
92
+ </div>
93
+ </div>
94
+
95
+
96
+
97
+
98
+
99
+ <div class="content-bubble-arrow"></div>
100
+
101
+
102
+ <table cellspacing="0" class="columns">
103
+ <tbody>
104
+ <tr>
105
+ <td id="content" class="round-left column">
106
+ <div class="wrapper">
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+ <div class="profile-user">
117
+ <div id="user_783214" class="user ">
118
+ <h2 class="thumb clearfix">
119
+ <a href="/account/profile_image/twitter?hreflang=en"><img alt="" border="0" height="73" id="profile-image" src="http://a1.twimg.com/profile_images/75075164/twitter_bird_profile_bigger.png" valign="middle" width="73" /></a>
120
+ <div class="screen-name">twitter</div>
121
+ </h2>
122
+ </div>
123
+ </div>
124
+
125
+
126
+ <div class="section">
127
+
128
+ <div id="timeline_heading" style="display: none;">
129
+ <h1 id="heading"></h1>
130
+ </div>
131
+ <ol id='timeline' class='statuses'>
132
+ <li class="hentry u-twitter status latest-status" id="status_6191506635">
133
+ <span class="status-body">
134
+ <span class="entry-content">SMS delivery issues on AT&T <a href="http://bit.ly/7JFJ6H" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/7JFJ6H</a></span>
135
+ <span class="meta entry-meta">
136
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/6191506635">
137
+ <span class="published timestamp" data="{time:'Mon Nov 30 04:10:51 +0000 2009'}">about 8 hours ago</span>
138
+ </a>
139
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
140
+
141
+ </span>
142
+ </span>
143
+ </li>
144
+ <li class="hentry u-twitter status" id="status_5989297065">
145
+ <span class="status-body">
146
+ <span class="entry-content">Fixing elevated error rate on twitter.com <a href="http://bit.ly/4xRf8U" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4xRf8U</a></span>
147
+ <span class="meta entry-meta">
148
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5989297065">
149
+ <span class="published timestamp" data="{time:'Mon Nov 23 22:13:27 +0000 2009'}">2:13 PM Nov 23rd</span>
150
+ </a>
151
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
152
+
153
+ </span>
154
+ </span>
155
+ </li>
156
+ <li class="hentry u-twitter status" id="status_5875860574">
157
+ <span class="status-body">
158
+ <span class="entry-content">Abonnez-vous à @<a class="tweet-url username" href="/Twitter_FR">Twitter_FR</a>, le compte officiel de Twitter en français!</span>
159
+ <span class="meta entry-meta">
160
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875860574">
161
+ <span class="published timestamp" data="{time:'Fri Nov 20 01:34:31 +0000 2009'}">5:34 PM Nov 19th</span>
162
+ </a>
163
+ <span>from web</span>
164
+
165
+ </span>
166
+ </span>
167
+ </li>
168
+ <li class="hentry u-twitter status" id="status_5875627626">
169
+ <span class="status-body">
170
+ <span class="entry-content">Nouvelle saveur : Twitter en Français! <a href="http://bit.ly/DeCHQ" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/DeCHQ</a></span>
171
+ <span class="meta entry-meta">
172
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875627626">
173
+ <span class="published timestamp" data="{time:'Fri Nov 20 01:25:37 +0000 2009'}">5:25 PM Nov 19th</span>
174
+ </a>
175
+ <span>from web</span>
176
+
177
+ </span>
178
+ </span>
179
+ </li>
180
+ <li class="hentry u-twitter status" id="status_5874789939">
181
+ <span class="status-body">
182
+ <span class="entry-content">RT @<a class="tweet-url username" href="/macgill">macgill</a>: Released refreshed Twitter privacy policy <a href="http://bit.ly/kYyQ6" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/kYyQ6</a> and a new helpful trademark page <a href="http://bit.ly/2iGZgV" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2iGZgV</a> Check </span>
183
+ <a href="http://twitter.com/twitter/status/5874789939">...</a> <span class="meta entry-meta">
184
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5874789939">
185
+ <span class="published timestamp" data="{time:'Fri Nov 20 00:53:33 +0000 2009'}">4:53 PM Nov 19th</span>
186
+ </a>
187
+ <span>from web</span>
188
+
189
+ </span>
190
+ </span>
191
+ </li>
192
+ <li class="hentry u-twitter status" id="status_5870049749">
193
+ <span class="status-body">
194
+ <span class="entry-content">Think globally, Tweet locally with geotagging. Opt-in! <a href="http://blog.twitter.com/2009/11/think-globally-tweet-locally.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
195
+ <span class="meta entry-meta">
196
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5870049749">
197
+ <span class="published timestamp" data="{time:'Thu Nov 19 21:54:41 +0000 2009'}">1:54 PM Nov 19th</span>
198
+ </a>
199
+ <span>from web</span>
200
+
201
+ </span>
202
+ </span>
203
+ </li>
204
+ <li class="hentry u-twitter status" id="status_5867297191">
205
+ <span class="status-body">
206
+ <span class="entry-content">RT @<a class="tweet-url username" href="/davewiner">davewiner</a>: The new Retweet is cool! (Scripting News) <a href="http://r2.ly/wvz4" class="tweet-url web" rel="nofollow" target="_blank">http://r2.ly/wvz4</a></span>
207
+ <span class="meta entry-meta">
208
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5867297191">
209
+ <span class="published timestamp" data="{time:'Thu Nov 19 20:09:04 +0000 2009'}">12:09 PM Nov 19th</span>
210
+ </a>
211
+ <span>from web</span>
212
+
213
+ </span>
214
+ </span>
215
+ </li>
216
+ <li class="hentry u-twitter status" id="status_5866757295">
217
+ <span class="status-body">
218
+ <span class="entry-content">What did the general say when he found out his army was going to lose the war? Retweet! (feature now available to all users)</span>
219
+ <span class="meta entry-meta">
220
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5866757295">
221
+ <span class="published timestamp" data="{time:'Thu Nov 19 19:48:01 +0000 2009'}">11:48 AM Nov 19th</span>
222
+ </a>
223
+ <span>from web</span>
224
+
225
+ </span>
226
+ </span>
227
+ </li>
228
+ <li class="hentry u-twitter status" id="status_5865461320">
229
+ <span class="status-body">
230
+ <span class="entry-content">Quick question: What's happening? <a href="http://blog.twitter.com/2009/11/whats-happening.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
231
+ <span class="meta entry-meta">
232
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5865461320">
233
+ <span class="published timestamp" data="{time:'Thu Nov 19 18:57:36 +0000 2009'}">10:57 AM Nov 19th</span>
234
+ </a>
235
+ <span>from web</span>
236
+
237
+ </span>
238
+ </span>
239
+ </li>
240
+ <li class="hentry u-twitter status" id="status_5838277342">
241
+ <span class="status-body">
242
+ <span class="entry-content">A picture is worth more than 140 characters. Shout out to @<a class="tweet-url username" href="/flickr">flickr</a> for their handy Flickr2Twitter app! <a href="/search?q=%23appwednesday" title="#appwednesday" class="tweet-url hashtag">#appwednesday</a></span>
243
+ <span class="meta entry-meta">
244
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5838277342">
245
+ <span class="published timestamp" data="{time:'Wed Nov 18 21:47:50 +0000 2009'}">1:47 PM Nov 18th</span>
246
+ </a>
247
+ <span>from web</span>
248
+
249
+ </span>
250
+ </span>
251
+ </li>
252
+ <li class="hentry u-twitter status" id="status_5812855148">
253
+ <span class="status-body">
254
+ <span class="entry-content">Wrong profile background pictures <a href="http://bit.ly/6lMhG" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/6lMhG</a></span>
255
+ <span class="meta entry-meta">
256
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5812855148">
257
+ <span class="published timestamp" data="{time:'Wed Nov 18 01:11:32 +0000 2009'}">5:11 PM Nov 17th</span>
258
+ </a>
259
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
260
+
261
+ </span>
262
+ </span>
263
+ </li>
264
+ <li class="hentry u-twitter status" id="status_5783517157">
265
+ <span class="status-body">
266
+ <span class="entry-content">Another first from the UK - MMS your pics to Twitter! <a href="http://bit.ly/5cm7R" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/5cm7R</a></span>
267
+ <span class="meta entry-meta">
268
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5783517157">
269
+ <span class="published timestamp" data="{time:'Tue Nov 17 02:01:27 +0000 2009'}">6:01 PM Nov 16th</span>
270
+ </a>
271
+ <span>from web</span>
272
+
273
+ </span>
274
+ </span>
275
+ </li>
276
+ <li class="hentry u-twitter status" id="status_5782562902">
277
+ <span class="status-body">
278
+ <span class="entry-content">Breaking in the new office with an awesome performance by @<a class="tweet-url username" href="/1republic">1republic</a>. Streaming live at <a href="http://bit.ly/4C8Hvv" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4C8Hvv</a>.</span>
279
+ <span class="meta entry-meta">
280
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5782562902">
281
+ <span class="published timestamp" data="{time:'Tue Nov 17 01:25:13 +0000 2009'}">5:25 PM Nov 16th</span>
282
+ </a>
283
+ <span>from web</span>
284
+
285
+ </span>
286
+ </span>
287
+ </li>
288
+ <li class="hentry u-twitter status" id="status_5781410369">
289
+ <span class="status-body">
290
+ <span class="entry-content">Maintenance window Tuesday, November 17th at 11p Pacific <a href="http://bit.ly/4AqkLs" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4AqkLs</a></span>
291
+ <span class="meta entry-meta">
292
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5781410369">
293
+ <span class="published timestamp" data="{time:'Tue Nov 17 00:41:43 +0000 2009'}">4:41 PM Nov 16th</span>
294
+ </a>
295
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
296
+
297
+ </span>
298
+ </span>
299
+ </li>
300
+ <li class="hentry u-twitter status" id="status_5778942639">
301
+ <span class="status-body">
302
+ <span class="entry-content">Official pics from the shiny new Twitter HQ! <a href="http://bit.ly/2oVk85" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2oVk85</a></span>
303
+ <span class="meta entry-meta">
304
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5778942639">
305
+ <span class="published timestamp" data="{time:'Mon Nov 16 23:08:56 +0000 2009'}">3:08 PM Nov 16th</span>
306
+ </a>
307
+ <span>from web</span>
308
+
309
+ </span>
310
+ </span>
311
+ </li>
312
+ <li class="hentry u-twitter status" id="status_5772564273">
313
+ <span class="status-body">
314
+ <span class="entry-content">Feels like the 1st day of school at the new office! Here's one last tribute to the old HQ: <a href="http://bit.ly/1BkDvi" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/1BkDvi</a></span>
315
+ <span class="meta entry-meta">
316
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5772564273">
317
+ <span class="published timestamp" data="{time:'Mon Nov 16 18:56:52 +0000 2009'}">10:56 AM Nov 16th</span>
318
+ </a>
319
+ <span>from web</span>
320
+
321
+ </span>
322
+ </span>
323
+ </li>
324
+ <li class="hentry u-twitter status" id="status_5742847664">
325
+ <span class="status-body">
326
+ <span class="entry-content">Tweeps from 21 states and 5 countries gathered with @<a class="tweet-url username" href="/nasa">nasa</a> to tweet first-hand about the space shuttle launch tomorrow. <a href="http://bit.ly/nyOrn" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/nyOrn</a></span>
327
+ <span class="meta entry-meta">
328
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5742847664">
329
+ <span class="published timestamp" data="{time:'Sun Nov 15 19:05:27 +0000 2009'}">11:05 AM Nov 15th</span>
330
+ </a>
331
+ <span>from web</span>
332
+
333
+ </span>
334
+ </span>
335
+ </li>
336
+ <li class="hentry u-twitter status" id="status_5687484356">
337
+ <span class="status-body">
338
+ <span class="entry-content">Missing "in reply to" links <a href="http://bit.ly/3HIBNe" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/3HIBNe</a></span>
339
+ <span class="meta entry-meta">
340
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5687484356">
341
+ <span class="published timestamp" data="{time:'Fri Nov 13 18:52:05 +0000 2009'}">10:52 AM Nov 13th</span>
342
+ </a>
343
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
344
+
345
+ </span>
346
+ </span>
347
+ </li>
348
+ <li class="hentry u-twitter status" id="status_5686368225">
349
+ <span class="status-body">
350
+ <span class="entry-content">Lots to look forward to next week with the big move to our new headquarters. @<a class="tweet-url username" href="/sara">sara</a> has outdone herself on the interior design! <a href="/search?q=%23twitterhq" title="#twitterhq" class="tweet-url hashtag">#twitterhq</a></span>
351
+ <span class="meta entry-meta">
352
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5686368225">
353
+ <span class="published timestamp" data="{time:'Fri Nov 13 18:09:17 +0000 2009'}">10:09 AM Nov 13th</span>
354
+ </a>
355
+ <span>from web</span>
356
+
357
+ </span>
358
+ </span>
359
+ </li>
360
+ <li class="hentry u-twitter status" id="status_5632730783">
361
+ <span class="status-body">
362
+ <span class="entry-content">Retweet feature temporarily disabled <a href="http://bit.ly/TU4h3" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/TU4h3</a></span>
363
+ <span class="meta entry-meta">
364
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5632730783">
365
+ <span class="published timestamp" data="{time:'Wed Nov 11 23:18:48 +0000 2009'}">3:18 PM Nov 11th</span>
366
+ </a>
367
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
368
+
369
+ </span>
370
+ </span>
371
+ </li>
372
+ </ol> <div id="pagination">
373
+ <a href="/twitter?max_id=6191506635&amp;page=2&amp;twttr=true" class="round more" id="more" rel="next">more</a> </div>
374
+
375
+ </div>
376
+
377
+
378
+
379
+ </div>
380
+ </td>
381
+
382
+ <td id="side_base" class="column round-right">
383
+
384
+ <div id="side">
385
+
386
+ <div id="profile" class="section profile-side">
387
+ <span class="section-links">
388
+ </span>
389
+ <address>
390
+ <ul class="about vcard entry-author">
391
+
392
+
393
+
394
+ <li><span class="label">Name</span> <span class="fn">Twitter</span></li>
395
+ <li><span class="label">Location</span> <span class="adr">San Francisco, CA</span></li>
396
+ <li><span class="label">Web</span> <a href="http://twitter.com" class="url" rel="me nofollow" target="_blank">http://twitter.com</a></li>
397
+ <li id="bio"><span class="label">Bio</span> <span class="bio">Always wondering what's happening. </span></li>
398
+
399
+ </ul>
400
+ </address>
401
+
402
+
403
+
404
+ <div class="stats">
405
+ <table>
406
+ <tr>
407
+ <td>
408
+
409
+ <a href="/twitter/following" id="following_count_link" class="link-following_page" rel="me" title="See who twitter is following">
410
+ <span id="following_count" class="stats_count numeric">123 </span>
411
+ <span class="label">Following</span>
412
+ </a>
413
+
414
+ </td>
415
+ <td>
416
+
417
+ <a href="/twitter/followers" id="follower_count_link" class="link-followers_page" rel="me" title="See who's following twitter">
418
+ <span id="follower_count" class="stats_count numeric">2,700,999 </span>
419
+ <span class="label">Followers</span>
420
+ </a>
421
+
422
+ </td>
423
+ <td>
424
+
425
+ <a href="/twitter/lists/memberships" id="lists_count_link" class="link-lists_page" rel="me" title="See which lists twitter is on">
426
+ <span id="lists_count" class="stats_count numeric">12,392 </span>
427
+ <span class="label">Listed</span>
428
+ </a>
429
+
430
+ </td>
431
+ </tr>
432
+ </table>
433
+ </div>
434
+
435
+ </div>
436
+
437
+ <ul id="primary_nav" class="sidebar-menu">
438
+ <li id="profile_tab"><a href="/twitter" accesskey="u"><span id="update_count" class="stat_count">592</span><span>Tweets</span></a></li>
439
+ <li id="profile_favorites_tab"><a href="http://twitter.com/twitter/favorites" accesskey="f"><span>Favorites</span></a></li>
440
+ </ul>
441
+
442
+
443
+ <hr/>
444
+ <div id="side_lists">
445
+ <h2 class="sidebar-title"><span>Lists</span></h2>
446
+
447
+ <ul class="sidebar-menu lists-links">
448
+ <li><a href="/twitter/team" class="list_574" data="{&quot;dispatch_action&quot;:&quot;list&quot;,&quot;mode&quot;:&quot;public&quot;,&quot;description&quot;:&quot;&quot;,&quot;uri&quot;:&quot;/twitter/team&quot;,&quot;subscriber_count&quot;:60480,&quot;slug&quot;:&quot;team&quot;,&quot;full_name&quot;:&quot;@twitter/team&quot;,&quot;user&quot;:&quot;twitter&quot;,&quot;name&quot;:&quot;Team&quot;,&quot;id&quot;:574,&quot;member_count&quot;:124}" title="@twitter/Team"><span>@twitter/<wbr/><b>team</b></span></a></li>
449
+ </ul>
450
+ <p class="sidebar-menu sidebar-menu-actions">
451
+ <span class="view-all"><a href="http://twitter.com/twitter/lists">View all</a></span>
452
+ </p>
453
+ </div>
454
+
455
+
456
+ <hr/>
457
+
458
+
459
+ <div id="following">
460
+ <h2 class="sidebar-title" id="fm_menu"><span>Following</span></h2>
461
+
462
+ <div class="sidebar-menu">
463
+ <div id="following_list">
464
+
465
+ <span class="vcard">
466
+ <a href="/troyholden" class="url" hreflang="en" rel="contact" title="troyholden"><img alt="troyholden" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/471907441/4002551589_23daaeeca3_mini.jpg" width="24" /></a> </span>
467
+
468
+
469
+ <span class="vcard">
470
+ <a href="/twitter_fr" class="url" hreflang="en" rel="contact" title="Twitter Français"><img alt="Twitter Français" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/534477089/twitter_bird_profile_mini.png" width="24" /></a> </span>
471
+
472
+
473
+ <span class="vcard">
474
+ <a href="/jreichhold" class="url" hreflang="en" rel="contact" title="jreichhold"><img alt="jreichhold" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/54857067/Photo_1_mini.jpg" width="24" /></a> </span>
475
+
476
+
477
+ <span class="vcard">
478
+ <a href="/imownbey" class="url" hreflang="en" rel="contact" title="Ian Miles Ownbey"><img alt="Ian Miles Ownbey" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/64790496/Photo_2_mini.jpg" width="24" /></a> </span>
479
+
480
+
481
+ <span class="vcard">
482
+ <a href="/dongwang218" class="url" hreflang="en" rel="contact" title="Dong Wang"><img alt="Dong Wang" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/382072008/dong_mini.jpg" width="24" /></a> </span>
483
+
484
+
485
+ <span class="vcard">
486
+ <a href="/bsuto" class="url" hreflang="en" rel="contact" title="Brian Sutorius"><img alt="Brian Sutorius" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/452951844/Screen_shot_2009-10-04_at_3.39.37_PM_mini.png" width="24" /></a> </span>
487
+
488
+
489
+ <span class="vcard">
490
+ <a href="/BFF" class="url" hreflang="en" rel="contact" title="Brandi"><img alt="Brandi" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/511212714/care-bears_best-friends_mini.jpg" width="24" /></a> </span>
491
+
492
+
493
+ <span class="vcard">
494
+ <a href="/dino" class="url" hreflang="en" rel="contact" title="Dino"><img alt="Dino" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/508083749/Photo_2_mini.jpg" width="24" /></a> </span>
495
+
496
+
497
+ <span class="vcard">
498
+ <a href="/francesca" class="url" hreflang="en" rel="contact" title="Francesca"><img alt="Francesca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/115081740/Singlephoto_mini.jpg" width="24" /></a> </span>
499
+
500
+
501
+ <span class="vcard">
502
+ <a href="/th" class="url" hreflang="en" rel="contact" title="taylor harwin"><img alt="taylor harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/492316898/IMG_0058_mini.JPG" width="24" /></a> </span>
503
+
504
+
505
+ <span class="vcard">
506
+ <a href="/taylorharwin" class="url" hreflang="fr" rel="contact" title="Taylor Harwin"><img alt="Taylor Harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/475719652/2847_626906472242_105819_36830072_2929508_n_mini.jpg" width="24" /></a> </span>
507
+
508
+
509
+ <span class="vcard">
510
+ <a href="/mischahere" class="url" hreflang="en" rel="contact" title="Mischa Nachtigal"><img alt="Mischa Nachtigal" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/435895728/waldo_mini.jpg" width="24" /></a> </span>
511
+
512
+
513
+ <span class="vcard">
514
+ <a href="/cayley" class="url" hreflang="en" rel="contact" title="Cayley Torgeson"><img alt="Cayley Torgeson" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/446923096/IMG_0072_mini.jpg" width="24" /></a> </span>
515
+
516
+
517
+ <span class="vcard">
518
+ <a href="/meetutkarsh" class="url" hreflang="en" rel="contact" title="Utkarsh Srivastava"><img alt="Utkarsh Srivastava" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/193389882/IMG_1906_mini.JPG" width="24" /></a> </span>
519
+
520
+
521
+ <span class="vcard">
522
+ <a href="/twitter_es" class="url" hreflang="en" rel="contact" title="Twitter Español"><img alt="Twitter Español" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/504883319/twitter_bird_profile_mini.png" width="24" /></a> </span>
523
+
524
+
525
+ <span class="vcard">
526
+ <a href="/rion" class="url" hreflang="en" rel="contact" title="Rion"><img alt="Rion" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/182109900/Photo_8_mini.jpg" width="24" /></a> </span>
527
+
528
+
529
+ <span class="vcard">
530
+ <a href="/nancyjconnery" class="url" hreflang="en" rel="contact" title="Nancy Connery"><img alt="Nancy Connery" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/93080012/Mommy_and_Morgan_mini.jpg" width="24" /></a> </span>
531
+
532
+
533
+ <span class="vcard">
534
+ <a href="/tamtam2" class="url" hreflang="en" rel="contact" title="tamtam2"><img alt="tamtam2" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/458987118/Taimay_Red_Outside_2_half_mini.jpg" width="24" /></a> </span>
535
+
536
+
537
+ <span class="vcard">
538
+ <a href="/em33" class="url" hreflang="en" rel="contact" title="Emee"><img alt="Emee" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/61480913/em_mini.PNG" width="24" /></a> </span>
539
+
540
+
541
+ <span class="vcard">
542
+ <a href="/andr8a" class="url" hreflang="en" rel="contact" title="andr8a"><img alt="andr8a" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/81960525/kitty_mini.jpg" width="24" /></a> </span>
543
+
544
+
545
+ <span class="vcard">
546
+ <a href="/keerthi" class="url" hreflang="en" rel="contact" title="Keerthi Prakash"><img alt="Keerthi Prakash" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/104888409/keerthi_2_2__mini.jpg" width="24" /></a> </span>
547
+
548
+
549
+ <span class="vcard">
550
+ <a href="/Lukester" class="url" hreflang="en" rel="contact" title="Luke "><img alt="Luke " class="photo fn" height="24" src="http://a1.twimg.com/profile_images/543545486/image_mini.jpg" width="24" /></a> </span>
551
+
552
+
553
+ <span class="vcard">
554
+ <a href="/sean" class="url" hreflang="ja" rel="contact" title="Sean"><img alt="Sean" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/468339255/645882267_8CRyH-L_mini.jpg" width="24" /></a> </span>
555
+
556
+
557
+ <span class="vcard">
558
+ <a href="/che" class="url" hreflang="en" rel="contact" title="Cheryl Palarca"><img alt="Cheryl Palarca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/399472616/Picture_2a_mini.jpg" width="24" /></a> </span>
559
+
560
+
561
+ <span class="vcard">
562
+ <a href="/lg" class="url" hreflang="en" rel="contact" title="Larry Gadea"><img alt="Larry Gadea" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/53283340/q90406211_4464_mini.jpg" width="24" /></a> </span>
563
+
564
+
565
+ <span class="vcard">
566
+ <a href="/tiger" class="url" hreflang="en" rel="contact" title="Emily"><img alt="Emily" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257293259/724px-Tigerente_mini.jpg" width="24" /></a> </span>
567
+
568
+
569
+ <span class="vcard">
570
+ <a href="/mattknox" class="url" hreflang="en" rel="contact" title="matt knox"><img alt="matt knox" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257712871/n14602342_30779909_9881_mini.jpg" width="24" /></a> </span>
571
+
572
+
573
+ <span class="vcard">
574
+ <a href="/Magnuson" class="url" hreflang="en" rel="contact" title="Charles Magnuson"><img alt="Charles Magnuson" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/371840833/twitter_pic_mini.jpg" width="24" /></a> </span>
575
+
576
+
577
+ <span class="vcard">
578
+ <a href="/Charles" class="url" hreflang="en" rel="contact" title="Charles"><img alt="Charles" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/529789365/4099673652_248e60ccf5_mini.jpg" width="24" /></a> </span>
579
+
580
+
581
+ <span class="vcard">
582
+ <a href="/rsarver" class="url" hreflang="en" rel="contact" title="Ryan Sarver"><img alt="Ryan Sarver" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53700173/2448434960_65aba38823_t_mini.jpg" width="24" /></a> </span>
583
+
584
+
585
+ <span class="vcard">
586
+ <a href="/ElizaSwan" class="url" hreflang="en" rel="contact" title="Robin"><img alt="Robin" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/260125397/icon_mini.png" width="24" /></a> </span>
587
+
588
+
589
+ <span class="vcard">
590
+ <a href="/sam" class="url" hreflang="en" rel="contact" title="Sam Luckenbill"><img alt="Sam Luckenbill" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/65334155/601879580_479440f611_mini.jpg" width="24" /></a> </span>
591
+
592
+
593
+ <span class="vcard">
594
+ <a href="/noradio" class="url" hreflang="en" rel="contact" title="Marcel Molina"><img alt="Marcel Molina" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53473799/marcel-euro-rails-conf_mini.jpg" width="24" /></a> </span>
595
+
596
+
597
+ <span class="vcard">
598
+ <a href="/bakineggs" class="url" hreflang="en" rel="contact" title="Dan Barry"><img alt="Dan Barry" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/59318058/n30408690_33614537_9476_square_mini.jpg" width="24" /></a> </span>
599
+
600
+
601
+ <span class="vcard">
602
+ <a href="/emaland" class="url" hreflang="en" rel="contact" title="emaland"><img alt="emaland" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/68049269/n707915915_121_mini.jpg" width="24" /></a> </span>
603
+
604
+
605
+ <span class="vcard">
606
+ <a href="/ablegrape" class="url" hreflang="en" rel="contact" title="Doug Cook"><img alt="Doug Cook" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/76892852/sticker2_mini.jpg" width="24" /></a> </span>
607
+
608
+
609
+ </div>
610
+ <div id="friends_view_all">
611
+ <a href="/twitter/following" rel="me">View all&hellip;</a>
612
+ </div>
613
+
614
+ </div>
615
+
616
+
617
+ </div>
618
+
619
+
620
+
621
+
622
+ <div id="rssfeed">
623
+ <hr/>
624
+ <a href="/statuses/user_timeline/783214.rss" class="xref rss profile-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's tweets</a>
625
+ <a href="/favorites/783214.rss" class="xref rss favorites-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's favorites</a>
626
+ </div>
627
+
628
+
629
+ </div>
630
+ </td>
631
+
632
+ </tr>
633
+ </tbody>
634
+ </table>
635
+
636
+
637
+
638
+ <div id="footer"
639
+ class="round">
640
+ <h3 class="offscreen">Footer</h3>
641
+
642
+
643
+ <ul>
644
+ <li class="first">&copy; 2009 Twitter</li>
645
+ <li><a href="/about#about">About Us</a></li>
646
+ <li><a href="/about#contact">Contact</a></li>
647
+ <li><a href="http://blog.twitter.com">Blog</a></li>
648
+ <li><a href="http://status.twitter.com">Status</a></li>
649
+ <li><a href="/goodies">Goodies</a></li>
650
+ <li><a href="http://apiwiki.twitter.com/">API</a></li>
651
+ <li><a href="http://business.twitter.com/twitter101">Business</a></li>
652
+ <li><a href="http://help.twitter.com">Help</a></li>
653
+ <li><a href="/jobs">Jobs</a></li>
654
+ <li><a href="/tos">Terms</a></li>
655
+ <li><a href="/privacy">Privacy</a></li>
656
+ </ul>
657
+ </div>
658
+
659
+
660
+
661
+ <hr />
662
+
663
+ </div>
664
+
665
+
666
+
667
+ <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.0/jquery.min.js" type="text/javascript"></script>
668
+ <script src="http://a0.twimg.com/a/1259091217/javascripts/twitter.js?1259117012" type="text/javascript"></script>
669
+ <script src="http://a2.twimg.com/a/1259091217/javascripts/lib/jquery.tipsy.min.js?1259117012" type="text/javascript"></script>
670
+ <script type="text/javascript">
671
+ //<![CDATA[
672
+ page.user_screenname = 'twitter';
673
+ page.user_fullname = 'Twitter';
674
+ page.controller_name = 'AccountController';
675
+ page.action_name = 'profile';
676
+ twttr.form_authenticity_token = '7e394453cc9d849cab133ccb3180e8ae4e6a0258';
677
+ // FIXME: Reconcile with the kinds on the Status model.
678
+ twttr.statusKinds = {
679
+ UPDATE: 1,
680
+ SHARE: 2
681
+ };
682
+ twttr.ListPerUserLimit = 20;
683
+
684
+
685
+ //]]>
686
+ </script>
687
+ <script type="text/javascript">
688
+ //<![CDATA[
689
+
690
+ $( function () {
691
+ initializePage();
692
+
693
+ });
694
+
695
+ //]]>
696
+ </script>
697
+
698
+ <!-- BEGIN google analytics -->
699
+
700
+ <script type="text/javascript">
701
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
702
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
703
+ </script>
704
+
705
+ <script type="text/javascript">
706
+
707
+ try {
708
+ var pageTracker = _gat._getTracker("UA-30775-6");
709
+ pageTracker._setDomainName("twitter.com");
710
+ pageTracker._setVar('Not Logged In');
711
+ pageTracker._setVar('lang: en');
712
+ pageTracker._initData();
713
+ pageTracker._trackPageview('/profile/not_logged_in/twitter');
714
+ } catch(err) { }
715
+
716
+ </script>
717
+
718
+ <!-- END google analytics -->
719
+
720
+
721
+
722
+
723
+
724
+ <div id="notifications"></div>
725
+
726
+
727
+
728
+ <!-- ERB -->
729
+ </body>
730
+
731
+ </html>
@@ -0,0 +1,62 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ describe Scraper do
4
+ before :all do
5
+ @uri = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'twitter.html' ))
6
+ @pattern = ".status"
7
+ class Entry < Object
8
+ attr_accessor :text, :date
9
+ end
10
+ end
11
+
12
+ before :each do
13
+ Scraper.scrapers = {}
14
+ end
15
+
16
+ describe "defining a scraper" do
17
+ it "should create a new scraper using the specified name" do
18
+ Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) {}
19
+
20
+ Scraper(:s).scraper_source.should == @uri
21
+ end
22
+
23
+ it "should be set with and without block" do
24
+ Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) do |s|
25
+ s.text "foo"
26
+ s.date {"bar"}
27
+ end
28
+
29
+ @objects = Scraper.parse(:s)
30
+ @objects.first.text.should == "foo"
31
+ @objects.first.date.should == "bar"
32
+ end
33
+
34
+ it "should get the objects from twitter" do
35
+ Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
36
+ s.text { |node| node.at(".entry-content").inner_html }
37
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
38
+ end
39
+
40
+ @objects = Scraper.parse(:twitter)
41
+ @objects.size.should == 20
42
+ @objects.first.text.should == "SMS delivery issues on AT&T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
43
+ @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
44
+ end
45
+
46
+ it "should use a different html parser" do
47
+ require 'nokogiri'
48
+ Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
49
+
50
+ Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
51
+ s.text { |node| node.at(".entry-content").inner_html }
52
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
53
+ end
54
+
55
+ @objects = Scraper.parse(:twitter)
56
+ @objects.size.should == 20
57
+ @objects.first.text.should == "SMS delivery issues on AT&amp;T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
58
+ @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
59
+ end
60
+
61
+ end
62
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --format progress
2
+ --color
@@ -0,0 +1,7 @@
1
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
2
+ $: << File.join(File.dirname(__FILE__))
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'spec/autorun'
7
+ require 'object-scraper'
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: object-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Enrico Genauck
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.2
24
+ version:
25
+ description: Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.
26
+ email: kontakt@enricogenauck.de
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.rdoc
33
+ - lib/object-scraper.rb
34
+ - lib/object-scraper/scraper.rb
35
+ files:
36
+ - Manifest
37
+ - README.rdoc
38
+ - Rakefile
39
+ - lib/object-scraper.rb
40
+ - lib/object-scraper/scraper.rb
41
+ - object-scraper.gemspec
42
+ - spec/data/twitter.html
43
+ - spec/object-scraper/scraper_spec.rb
44
+ - spec/spec.opts
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/enricogenauck/object-scraper
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --line-numbers
53
+ - --inline-source
54
+ - --title
55
+ - Object-scraper
56
+ - --main
57
+ - README.rdoc
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "1.2"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: object-scraper
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Recipe like object extraction from HTML sources
79
+ test_files: []
80
+