object-scraper 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest ADDED
@@ -0,0 +1,10 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ lib/object-scraper.rb
5
+ lib/object-scraper/scraper.rb
6
+ object-scraper.gemspec
7
+ spec/data/twitter.html
8
+ spec/object-scraper/scraper_spec.rb
9
+ spec/spec.opts
10
+ spec/spec_helper.rb
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = Object Scraper
2
+
3
+ == Description
4
+
5
+ Object scraper is a thin wrapper for hpricot to enable receipt-like
6
+ extraction of ruby objects from various web sites.
7
+
8
+ == Install
9
+
10
+ === Gem
11
+
12
+ gem install object-scraper --source http://gemcutter.org
13
+
14
+ === Rails
15
+
16
+ config.gem 'object-scraper', :source => 'http://gemcutter.org'
17
+
18
+ == Example
19
+
20
+ class Entry < Object
21
+ attr_accessor :text, :date
22
+ end
23
+
24
+ uri = "http://twitter.com/twitter"
25
+ pattern = ".status"
26
+
27
+ Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
28
+ s.text { |node| node.at(".entry-content").inner_html }
29
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
30
+ end
31
+
32
+ @objects = Scraper.parse(:twitter)
33
+
34
+ == Advanced Example
35
+
36
+ It is possible to use other existing HTML parsers instead of hpricot.
37
+ Just overwrite the according proc object.
38
+
39
+ require 'nokogiri'
40
+ Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
41
+
42
+ Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
43
+ # initialize your objects here accordingly
44
+ end
45
+
46
+ == Rails
47
+
48
+ All scraper definitions sitting in RAILS_ROOT/scrapers will be taken into account
49
+ automatically when you use object-scraper as a gem in your rails project.
50
+
51
+ == Author
52
+
53
+ - Maintained by {Enrico Genauck}[mailto:kontakt@enricogenauck.de]
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('object-scraper', '0.0.2') do |p|
6
+ p.summary = "Recipe like object extraction from HTML sources"
7
+ p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
+ p.url = "http://github.com/enricogenauck/object-scraper"
9
+ p.author = "Enrico Genauck"
10
+ p.email = "kontakt@enricogenauck.de"
11
+ p.ignore_pattern = ["tmp/*", "script/*"]
12
+ p.development_dependencies = []
13
+ p.runtime_dependencies = ["hpricot >=0.8.2"]
14
+ end
15
+
16
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,128 @@
1
+ class Scraper
2
+
3
+ # Raised when a scraper is defined with the same name as a previously-defined scraper.
4
+ class DuplicateDefinitionError < RuntimeError
5
+ end
6
+
7
+ class << self
8
+ attr_accessor :scrapers
9
+ attr_accessor :scrape_source_with
10
+ attr_accessor :definition_file_paths
11
+ end
12
+
13
+ self.scrapers = {}
14
+ self.scrape_source_with = Proc.new { |source| Hpricot(source) }
15
+ self.definition_file_paths = %w(scrapers)
16
+
17
+ attr_reader :scraper_source, :scraper_node
18
+
19
+ def self.define(name, options = {}, &block)
20
+ instance = Scraper.new(name, options, &block)
21
+
22
+ if self.scrapers[name]
23
+ raise DuplicateDefinitionError, "Scraper already defined: #{name}"
24
+ end
25
+
26
+ self.scrapers[name] = instance
27
+ end
28
+
29
+ def initialize(name, options = {}, &block) #:nodoc:
30
+ assert_valid_options(options)
31
+ @objects = []
32
+ @class = class_for(options[:class])
33
+ @scraper_source = options[:source]
34
+ @scraper_node = options[:node]
35
+ @block = block
36
+ end
37
+
38
+ def self.get(name)
39
+ scraper_by_name(name)
40
+ end
41
+
42
+ def self.parse(name)
43
+ scraper_by_name(name).parse
44
+ end
45
+
46
+ def parse
47
+ doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
48
+ doc.search(@scraper_node).each do |n|
49
+ @current_node = n
50
+ @current_object = @class.new
51
+ @objects << @current_object
52
+ @block.call(self)
53
+ end
54
+ @objects
55
+ end
56
+
57
+ def self.scraper_by_name(name)
58
+ scrapers[name.to_sym] or raise ArgumentError, "No such scraper: #{name.to_s}"
59
+ end
60
+
61
+ def method_missing(symbol, *args, &block)
62
+ if block_given?
63
+ @current_object.send("#{symbol}=", yield(@current_node))
64
+ else
65
+ @current_object.send("#{symbol}=", args.first)
66
+ end
67
+ end
68
+
69
+ def self.find_definitions
70
+ definition_file_paths.each do |path|
71
+ require("#{path}.rb") if File.exists?("#{path}.rb")
72
+
73
+ if File.directory? path
74
+ Dir[File.join(path, '*.rb')].each do |file|
75
+ require file
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def class_for(class_or_to_s)
84
+ if class_or_to_s.respond_to?(:to_sym)
85
+ Object.const_get(variable_name_to_class_name(class_or_to_s))
86
+ else
87
+ class_or_to_s
88
+ end
89
+ end
90
+
91
+ def scraper_name_for(class_or_to_s)
92
+ if class_or_to_s.respond_to?(:to_sym)
93
+ class_or_to_s.to_sym
94
+ else
95
+ class_name_to_variable_name(class_or_to_s).to_sym
96
+ end
97
+ end
98
+
99
+ def class_name_to_variable_name(name)
100
+ name.to_s.gsub(/::/, '/').
101
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
102
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
103
+ tr("-", "_").
104
+ downcase
105
+ end
106
+
107
+ def variable_name_to_class_name(name)
108
+ name.to_s.
109
+ gsub(/\/(.?)/) { "::#{$1.upcase}" }.
110
+ gsub(/(?:^|_)(.)/) { $1.upcase }
111
+ end
112
+
113
+ def assert_valid_options(options)
114
+ invalid_keys = options.keys - [:class, :source, :node]
115
+ unless invalid_keys == []
116
+ raise ArgumentError, "Unknown arguments: #{invalid_keys.inspect}"
117
+ end
118
+ unless options[:class]
119
+ raise ArgumentError, "Missing argument: :class"
120
+ end
121
+ unless options[:source]
122
+ raise ArgumentError, "Missing argument: :source"
123
+ end
124
+ unless options[:node]
125
+ raise ArgumentError, "Missing argument: :node"
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,20 @@
1
+ require 'object-scraper/scraper'
2
+ require 'open-uri'
3
+ require 'hpricot'
4
+
5
+ # Shortcut for Scraper.extract
6
+ #
7
+ # Example:
8
+ # Scraper(:my_space)
9
+ def Scraper(name)
10
+ Scraper.get(name)
11
+ end
12
+
13
+ if defined? Rails.configuration
14
+ Rails.configuration.after_initialize do
15
+ Scraper.definition_file_paths = [File.join(RAILS_ROOT, 'scrapers')]
16
+ Scraper.find_definitions
17
+ end
18
+ else
19
+ Scraper.find_definitions
20
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{object-scraper}
5
+ s.version = "0.0.2"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Enrico Genauck"]
9
+ s.date = %q{2009-12-10}
10
+ s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
+ s.email = %q{kontakt@enricogenauck.de}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "object-scraper.gemspec", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
14
+ s.homepage = %q{http://github.com/enricogenauck/object-scraper}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Object-scraper", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{object-scraper}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{Recipe like object extraction from HTML sources}
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ s.add_runtime_dependency(%q<hpricot>, [">= 0.8.2"])
27
+ else
28
+ s.add_dependency(%q<hpricot>, [">= 0.8.2"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<hpricot>, [">= 0.8.2"])
32
+ end
33
+ end
@@ -0,0 +1,731 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head>
4
+ <script type="text/javascript">
5
+ //<![CDATA[
6
+ var page={};var onCondition=function(D,C,A,B){D=D;A=A?Math.min(A,5):5;B=B||100;if(D()){C()}else{if(A>1){setTimeout(function(){onCondition(D,C,A-1,B)},B)}}};
7
+ //]]>
8
+ </script>
9
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
10
+ <meta content="en-us" http-equiv="Content-Language" />
11
+ <meta content="Always wondering what's happening. " name="description" />
12
+ <meta content="no" http-equiv="imagetoolbar" />
13
+ <meta content="width = 780" name="viewport" />
14
+ <meta content="4FTTxY4uvo0RZTMQqIyhh18HsepyJOctQ+XTOu1zsfE=" name="verify-v1" />
15
+ <meta content="1" name="page" />
16
+ <meta content="NOODP" name="robots" />
17
+ <meta content="n" name="session-loggedin" />
18
+ <meta content="twitter" name="page-user-screen_name" />
19
+ <title id="page_title">Twitter (twitter) on Twitter</title>
20
+ <link href="http://a1.twimg.com/a/1259091217/images/favicon.ico" rel="shortcut icon" type="image/x-icon" />
21
+ <link href="http://a1.twimg.com/a/1259091217/images/twitter_57.png" rel="apple-touch-icon" />
22
+ <link rel="alternate" href="http://twitter.com/statuses/user_timeline/783214.rss" title="twitter's Tweets" type="application/rss+xml" />
23
+ <link rel="alternate" href="http://twitter.com/favorites/783214.rss" title="twitter's Favorites" type="application/rss+xml" />
24
+
25
+
26
+ <link href="http://a1.twimg.com/a/1259091217/stylesheets/twitter.css?1259117012" media="screen" rel="stylesheet" type="text/css" />
27
+ <style type="text/css">
28
+
29
+ body { background: #C0DEED url('http://a3.twimg.com/a/1259091217/images/bg-clouds.png') repeat-x; }
30
+
31
+
32
+ body {
33
+ background-position: 0 0;
34
+ padding-top: 0;
35
+ }
36
+
37
+ </style>
38
+ <link href="http://a0.twimg.com/a/1259091217/stylesheets/following.css?1259117012" media="screen, projection" rel="stylesheet" type="text/css" />
39
+
40
+ </head>
41
+
42
+ <body class="account safari" id="profile">
43
+ <script type="text/javascript">
44
+ //<![CDATA[
45
+ if (window.top !== window.self) {document.write = "";window.top.location = window.self.location; setTimeout(function(){document.body.innerHTML='';},1);window.self.onload=function(evt){document.body.innerHTML='';};}
46
+ //]]>
47
+ </script>
48
+
49
+
50
+ <div id="dim-screen"></div>
51
+ <ul id="accessibility" class="offscreen">
52
+ <li><a href="#content" accesskey="0">Skip past navigation</a></li>
53
+ <li>On a mobile phone? Check out <a href="http://m.twitter.com/">m.twitter.com</a>!</li>
54
+ <li><a href="#footer" accesskey="2">Skip to navigation</a></li>
55
+ <li><a href="#signin">Skip to sign in form</a></li>
56
+ </ul>
57
+
58
+
59
+
60
+ <div id="container" class="subpage">
61
+ <span id="loader" style="display:none"><img alt="Loader" src="http://a0.twimg.com/a/1259091217/images/loader.gif" /></span>
62
+ <div id="header">
63
+ <a href="http://twitter.com/" title="Twitter / Home" accesskey="1" id="logo">
64
+ <img alt="Twitter.com" height="36" src="http://a0.twimg.com/a/1259091217/images/twitter_logo_header.png" width="155" />
65
+ </a>
66
+ <form method="post" id="sign_out_form" action="/sessions/destroy" style="display:none;">
67
+ <input name="authenticity_token" value="7e394453cc9d849cab133ccb3180e8ae4e6a0258" type="hidden" />
68
+ </form>
69
+
70
+ <ul class="top-navigation round">
71
+ <li><a href="/login" accesskey="l">Login</a></li>
72
+ <li class="signup-link"><a href="/signup">Join Twitter!</a></li>
73
+ </ul>
74
+ </div>
75
+
76
+
77
+
78
+ <div id="profilebox_outer">
79
+ <div id="profilebird"><img alt="Profile_bird" height="48" id="profilebirdimg" src="http://a0.twimg.com/a/1259091217/images/profile_bird.png" width="48" /></div>
80
+ <div id="profilebox" class="clearfix">
81
+ <div id="profiletext">
82
+ <h1>Hey there! <strong>twitter</strong> is using Twitter.</h1>
83
+ <h2>Twitter is a free service that lets you keep in touch with people through the exchange of quick, frequent answers to one simple question: What's happening? <strong>Join today</strong> to start receiving <strong>twitter's</strong> tweets.</h2>
84
+ </div>
85
+ <div id="profilebutton">
86
+ <form name="account_signup_form" id="account_signup_form" action="/signup">
87
+ <input id="follow" name="follow" type="hidden" value="twitter" />
88
+ <input class="profilesubmit" id="join" name="commit" type="submit" value="Join today!" />
89
+ </form>
90
+ <p><small>Already using Twitter<br /> from your phone? <a href="/account/complete">Click here</a>.</small></p>
91
+ </div>
92
+ </div>
93
+ </div>
94
+
95
+
96
+
97
+
98
+
99
+ <div class="content-bubble-arrow"></div>
100
+
101
+
102
+ <table cellspacing="0" class="columns">
103
+ <tbody>
104
+ <tr>
105
+ <td id="content" class="round-left column">
106
+ <div class="wrapper">
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+ <div class="profile-user">
117
+ <div id="user_783214" class="user ">
118
+ <h2 class="thumb clearfix">
119
+ <a href="/account/profile_image/twitter?hreflang=en"><img alt="" border="0" height="73" id="profile-image" src="http://a1.twimg.com/profile_images/75075164/twitter_bird_profile_bigger.png" valign="middle" width="73" /></a>
120
+ <div class="screen-name">twitter</div>
121
+ </h2>
122
+ </div>
123
+ </div>
124
+
125
+
126
+ <div class="section">
127
+
128
+ <div id="timeline_heading" style="display: none;">
129
+ <h1 id="heading"></h1>
130
+ </div>
131
+ <ol id='timeline' class='statuses'>
132
+ <li class="hentry u-twitter status latest-status" id="status_6191506635">
133
+ <span class="status-body">
134
+ <span class="entry-content">SMS delivery issues on AT&T <a href="http://bit.ly/7JFJ6H" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/7JFJ6H</a></span>
135
+ <span class="meta entry-meta">
136
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/6191506635">
137
+ <span class="published timestamp" data="{time:'Mon Nov 30 04:10:51 +0000 2009'}">about 8 hours ago</span>
138
+ </a>
139
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
140
+
141
+ </span>
142
+ </span>
143
+ </li>
144
+ <li class="hentry u-twitter status" id="status_5989297065">
145
+ <span class="status-body">
146
+ <span class="entry-content">Fixing elevated error rate on twitter.com <a href="http://bit.ly/4xRf8U" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4xRf8U</a></span>
147
+ <span class="meta entry-meta">
148
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5989297065">
149
+ <span class="published timestamp" data="{time:'Mon Nov 23 22:13:27 +0000 2009'}">2:13 PM Nov 23rd</span>
150
+ </a>
151
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
152
+
153
+ </span>
154
+ </span>
155
+ </li>
156
+ <li class="hentry u-twitter status" id="status_5875860574">
157
+ <span class="status-body">
158
+ <span class="entry-content">Abonnez-vous à @<a class="tweet-url username" href="/Twitter_FR">Twitter_FR</a>, le compte officiel de Twitter en français!</span>
159
+ <span class="meta entry-meta">
160
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875860574">
161
+ <span class="published timestamp" data="{time:'Fri Nov 20 01:34:31 +0000 2009'}">5:34 PM Nov 19th</span>
162
+ </a>
163
+ <span>from web</span>
164
+
165
+ </span>
166
+ </span>
167
+ </li>
168
+ <li class="hentry u-twitter status" id="status_5875627626">
169
+ <span class="status-body">
170
+ <span class="entry-content">Nouvelle saveur : Twitter en Français! <a href="http://bit.ly/DeCHQ" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/DeCHQ</a></span>
171
+ <span class="meta entry-meta">
172
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875627626">
173
+ <span class="published timestamp" data="{time:'Fri Nov 20 01:25:37 +0000 2009'}">5:25 PM Nov 19th</span>
174
+ </a>
175
+ <span>from web</span>
176
+
177
+ </span>
178
+ </span>
179
+ </li>
180
+ <li class="hentry u-twitter status" id="status_5874789939">
181
+ <span class="status-body">
182
+ <span class="entry-content">RT @<a class="tweet-url username" href="/macgill">macgill</a>: Released refreshed Twitter privacy policy <a href="http://bit.ly/kYyQ6" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/kYyQ6</a> and a new helpful trademark page <a href="http://bit.ly/2iGZgV" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2iGZgV</a> Check </span>
183
+ <a href="http://twitter.com/twitter/status/5874789939">...</a> <span class="meta entry-meta">
184
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5874789939">
185
+ <span class="published timestamp" data="{time:'Fri Nov 20 00:53:33 +0000 2009'}">4:53 PM Nov 19th</span>
186
+ </a>
187
+ <span>from web</span>
188
+
189
+ </span>
190
+ </span>
191
+ </li>
192
+ <li class="hentry u-twitter status" id="status_5870049749">
193
+ <span class="status-body">
194
+ <span class="entry-content">Think globally, Tweet locally with geotagging. Opt-in! <a href="http://blog.twitter.com/2009/11/think-globally-tweet-locally.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
195
+ <span class="meta entry-meta">
196
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5870049749">
197
+ <span class="published timestamp" data="{time:'Thu Nov 19 21:54:41 +0000 2009'}">1:54 PM Nov 19th</span>
198
+ </a>
199
+ <span>from web</span>
200
+
201
+ </span>
202
+ </span>
203
+ </li>
204
+ <li class="hentry u-twitter status" id="status_5867297191">
205
+ <span class="status-body">
206
+ <span class="entry-content">RT @<a class="tweet-url username" href="/davewiner">davewiner</a>: The new Retweet is cool! (Scripting News) <a href="http://r2.ly/wvz4" class="tweet-url web" rel="nofollow" target="_blank">http://r2.ly/wvz4</a></span>
207
+ <span class="meta entry-meta">
208
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5867297191">
209
+ <span class="published timestamp" data="{time:'Thu Nov 19 20:09:04 +0000 2009'}">12:09 PM Nov 19th</span>
210
+ </a>
211
+ <span>from web</span>
212
+
213
+ </span>
214
+ </span>
215
+ </li>
216
+ <li class="hentry u-twitter status" id="status_5866757295">
217
+ <span class="status-body">
218
+ <span class="entry-content">What did the general say when he found out his army was going to lose the war? Retweet! (feature now available to all users)</span>
219
+ <span class="meta entry-meta">
220
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5866757295">
221
+ <span class="published timestamp" data="{time:'Thu Nov 19 19:48:01 +0000 2009'}">11:48 AM Nov 19th</span>
222
+ </a>
223
+ <span>from web</span>
224
+
225
+ </span>
226
+ </span>
227
+ </li>
228
+ <li class="hentry u-twitter status" id="status_5865461320">
229
+ <span class="status-body">
230
+ <span class="entry-content">Quick question: What's happening? <a href="http://blog.twitter.com/2009/11/whats-happening.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
231
+ <span class="meta entry-meta">
232
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5865461320">
233
+ <span class="published timestamp" data="{time:'Thu Nov 19 18:57:36 +0000 2009'}">10:57 AM Nov 19th</span>
234
+ </a>
235
+ <span>from web</span>
236
+
237
+ </span>
238
+ </span>
239
+ </li>
240
+ <li class="hentry u-twitter status" id="status_5838277342">
241
+ <span class="status-body">
242
+ <span class="entry-content">A picture is worth more than 140 characters. Shout out to @<a class="tweet-url username" href="/flickr">flickr</a> for their handy Flickr2Twitter app! <a href="/search?q=%23appwednesday" title="#appwednesday" class="tweet-url hashtag">#appwednesday</a></span>
243
+ <span class="meta entry-meta">
244
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5838277342">
245
+ <span class="published timestamp" data="{time:'Wed Nov 18 21:47:50 +0000 2009'}">1:47 PM Nov 18th</span>
246
+ </a>
247
+ <span>from web</span>
248
+
249
+ </span>
250
+ </span>
251
+ </li>
252
+ <li class="hentry u-twitter status" id="status_5812855148">
253
+ <span class="status-body">
254
+ <span class="entry-content">Wrong profile background pictures <a href="http://bit.ly/6lMhG" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/6lMhG</a></span>
255
+ <span class="meta entry-meta">
256
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5812855148">
257
+ <span class="published timestamp" data="{time:'Wed Nov 18 01:11:32 +0000 2009'}">5:11 PM Nov 17th</span>
258
+ </a>
259
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
260
+
261
+ </span>
262
+ </span>
263
+ </li>
264
+ <li class="hentry u-twitter status" id="status_5783517157">
265
+ <span class="status-body">
266
+ <span class="entry-content">Another first from the UK - MMS your pics to Twitter! <a href="http://bit.ly/5cm7R" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/5cm7R</a></span>
267
+ <span class="meta entry-meta">
268
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5783517157">
269
+ <span class="published timestamp" data="{time:'Tue Nov 17 02:01:27 +0000 2009'}">6:01 PM Nov 16th</span>
270
+ </a>
271
+ <span>from web</span>
272
+
273
+ </span>
274
+ </span>
275
+ </li>
276
+ <li class="hentry u-twitter status" id="status_5782562902">
277
+ <span class="status-body">
278
+ <span class="entry-content">Breaking in the new office with an awesome performance by @<a class="tweet-url username" href="/1republic">1republic</a>. Streaming live at <a href="http://bit.ly/4C8Hvv" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4C8Hvv</a>.</span>
279
+ <span class="meta entry-meta">
280
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5782562902">
281
+ <span class="published timestamp" data="{time:'Tue Nov 17 01:25:13 +0000 2009'}">5:25 PM Nov 16th</span>
282
+ </a>
283
+ <span>from web</span>
284
+
285
+ </span>
286
+ </span>
287
+ </li>
288
+ <li class="hentry u-twitter status" id="status_5781410369">
289
+ <span class="status-body">
290
+ <span class="entry-content">Maintenance window Tuesday, November 17th at 11p Pacific <a href="http://bit.ly/4AqkLs" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4AqkLs</a></span>
291
+ <span class="meta entry-meta">
292
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5781410369">
293
+ <span class="published timestamp" data="{time:'Tue Nov 17 00:41:43 +0000 2009'}">4:41 PM Nov 16th</span>
294
+ </a>
295
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
296
+
297
+ </span>
298
+ </span>
299
+ </li>
300
+ <li class="hentry u-twitter status" id="status_5778942639">
301
+ <span class="status-body">
302
+ <span class="entry-content">Official pics from the shiny new Twitter HQ! <a href="http://bit.ly/2oVk85" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2oVk85</a></span>
303
+ <span class="meta entry-meta">
304
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5778942639">
305
+ <span class="published timestamp" data="{time:'Mon Nov 16 23:08:56 +0000 2009'}">3:08 PM Nov 16th</span>
306
+ </a>
307
+ <span>from web</span>
308
+
309
+ </span>
310
+ </span>
311
+ </li>
312
+ <li class="hentry u-twitter status" id="status_5772564273">
313
+ <span class="status-body">
314
+ <span class="entry-content">Feels like the 1st day of school at the new office! Here's one last tribute to the old HQ: <a href="http://bit.ly/1BkDvi" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/1BkDvi</a></span>
315
+ <span class="meta entry-meta">
316
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5772564273">
317
+ <span class="published timestamp" data="{time:'Mon Nov 16 18:56:52 +0000 2009'}">10:56 AM Nov 16th</span>
318
+ </a>
319
+ <span>from web</span>
320
+
321
+ </span>
322
+ </span>
323
+ </li>
324
+ <li class="hentry u-twitter status" id="status_5742847664">
325
+ <span class="status-body">
326
+ <span class="entry-content">Tweeps from 21 states and 5 countries gathered with @<a class="tweet-url username" href="/nasa">nasa</a> to tweet first-hand about the space shuttle launch tomorrow. <a href="http://bit.ly/nyOrn" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/nyOrn</a></span>
327
+ <span class="meta entry-meta">
328
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5742847664">
329
+ <span class="published timestamp" data="{time:'Sun Nov 15 19:05:27 +0000 2009'}">11:05 AM Nov 15th</span>
330
+ </a>
331
+ <span>from web</span>
332
+
333
+ </span>
334
+ </span>
335
+ </li>
336
+ <li class="hentry u-twitter status" id="status_5687484356">
337
+ <span class="status-body">
338
+ <span class="entry-content">Missing "in reply to" links <a href="http://bit.ly/3HIBNe" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/3HIBNe</a></span>
339
+ <span class="meta entry-meta">
340
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5687484356">
341
+ <span class="published timestamp" data="{time:'Fri Nov 13 18:52:05 +0000 2009'}">10:52 AM Nov 13th</span>
342
+ </a>
343
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
344
+
345
+ </span>
346
+ </span>
347
+ </li>
348
+ <li class="hentry u-twitter status" id="status_5686368225">
349
+ <span class="status-body">
350
+ <span class="entry-content">Lots to look forward to next week with the big move to our new headquarters. @<a class="tweet-url username" href="/sara">sara</a> has outdone herself on the interior design! <a href="/search?q=%23twitterhq" title="#twitterhq" class="tweet-url hashtag">#twitterhq</a></span>
351
+ <span class="meta entry-meta">
352
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5686368225">
353
+ <span class="published timestamp" data="{time:'Fri Nov 13 18:09:17 +0000 2009'}">10:09 AM Nov 13th</span>
354
+ </a>
355
+ <span>from web</span>
356
+
357
+ </span>
358
+ </span>
359
+ </li>
360
+ <li class="hentry u-twitter status" id="status_5632730783">
361
+ <span class="status-body">
362
+ <span class="entry-content">Retweet feature temporarily disabled <a href="http://bit.ly/TU4h3" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/TU4h3</a></span>
363
+ <span class="meta entry-meta">
364
+ <a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5632730783">
365
+ <span class="published timestamp" data="{time:'Wed Nov 11 23:18:48 +0000 2009'}">3:18 PM Nov 11th</span>
366
+ </a>
367
+ <span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
368
+
369
+ </span>
370
+ </span>
371
+ </li>
372
+ </ol> <div id="pagination">
373
+ <a href="/twitter?max_id=6191506635&amp;page=2&amp;twttr=true" class="round more" id="more" rel="next">more</a> </div>
374
+
375
+ </div>
376
+
377
+
378
+
379
+ </div>
380
+ </td>
381
+
382
+ <td id="side_base" class="column round-right">
383
+
384
+ <div id="side">
385
+
386
+ <div id="profile" class="section profile-side">
387
+ <span class="section-links">
388
+ </span>
389
+ <address>
390
+ <ul class="about vcard entry-author">
391
+
392
+
393
+
394
+ <li><span class="label">Name</span> <span class="fn">Twitter</span></li>
395
+ <li><span class="label">Location</span> <span class="adr">San Francisco, CA</span></li>
396
+ <li><span class="label">Web</span> <a href="http://twitter.com" class="url" rel="me nofollow" target="_blank">http://twitter.com</a></li>
397
+ <li id="bio"><span class="label">Bio</span> <span class="bio">Always wondering what's happening. </span></li>
398
+
399
+ </ul>
400
+ </address>
401
+
402
+
403
+
404
+ <div class="stats">
405
+ <table>
406
+ <tr>
407
+ <td>
408
+
409
+ <a href="/twitter/following" id="following_count_link" class="link-following_page" rel="me" title="See who twitter is following">
410
+ <span id="following_count" class="stats_count numeric">123 </span>
411
+ <span class="label">Following</span>
412
+ </a>
413
+
414
+ </td>
415
+ <td>
416
+
417
+ <a href="/twitter/followers" id="follower_count_link" class="link-followers_page" rel="me" title="See who's following twitter">
418
+ <span id="follower_count" class="stats_count numeric">2,700,999 </span>
419
+ <span class="label">Followers</span>
420
+ </a>
421
+
422
+ </td>
423
+ <td>
424
+
425
+ <a href="/twitter/lists/memberships" id="lists_count_link" class="link-lists_page" rel="me" title="See which lists twitter is on">
426
+ <span id="lists_count" class="stats_count numeric">12,392 </span>
427
+ <span class="label">Listed</span>
428
+ </a>
429
+
430
+ </td>
431
+ </tr>
432
+ </table>
433
+ </div>
434
+
435
+ </div>
436
+
437
+ <ul id="primary_nav" class="sidebar-menu">
438
+ <li id="profile_tab"><a href="/twitter" accesskey="u"><span id="update_count" class="stat_count">592</span><span>Tweets</span></a></li>
439
+ <li id="profile_favorites_tab"><a href="http://twitter.com/twitter/favorites" accesskey="f"><span>Favorites</span></a></li>
440
+ </ul>
441
+
442
+
443
+ <hr/>
444
+ <div id="side_lists">
445
+ <h2 class="sidebar-title"><span>Lists</span></h2>
446
+
447
+ <ul class="sidebar-menu lists-links">
448
+ <li><a href="/twitter/team" class="list_574" data="{&quot;dispatch_action&quot;:&quot;list&quot;,&quot;mode&quot;:&quot;public&quot;,&quot;description&quot;:&quot;&quot;,&quot;uri&quot;:&quot;/twitter/team&quot;,&quot;subscriber_count&quot;:60480,&quot;slug&quot;:&quot;team&quot;,&quot;full_name&quot;:&quot;@twitter/team&quot;,&quot;user&quot;:&quot;twitter&quot;,&quot;name&quot;:&quot;Team&quot;,&quot;id&quot;:574,&quot;member_count&quot;:124}" title="@twitter/Team"><span>@twitter/<wbr/><b>team</b></span></a></li>
449
+ </ul>
450
+ <p class="sidebar-menu sidebar-menu-actions">
451
+ <span class="view-all"><a href="http://twitter.com/twitter/lists">View all</a></span>
452
+ </p>
453
+ </div>
454
+
455
+
456
+ <hr/>
457
+
458
+
459
+ <div id="following">
460
+ <h2 class="sidebar-title" id="fm_menu"><span>Following</span></h2>
461
+
462
+ <div class="sidebar-menu">
463
+ <div id="following_list">
464
+
465
+ <span class="vcard">
466
+ <a href="/troyholden" class="url" hreflang="en" rel="contact" title="troyholden"><img alt="troyholden" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/471907441/4002551589_23daaeeca3_mini.jpg" width="24" /></a> </span>
467
+
468
+
469
+ <span class="vcard">
470
+ <a href="/twitter_fr" class="url" hreflang="en" rel="contact" title="Twitter Français"><img alt="Twitter Français" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/534477089/twitter_bird_profile_mini.png" width="24" /></a> </span>
471
+
472
+
473
+ <span class="vcard">
474
+ <a href="/jreichhold" class="url" hreflang="en" rel="contact" title="jreichhold"><img alt="jreichhold" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/54857067/Photo_1_mini.jpg" width="24" /></a> </span>
475
+
476
+
477
+ <span class="vcard">
478
+ <a href="/imownbey" class="url" hreflang="en" rel="contact" title="Ian Miles Ownbey"><img alt="Ian Miles Ownbey" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/64790496/Photo_2_mini.jpg" width="24" /></a> </span>
479
+
480
+
481
+ <span class="vcard">
482
+ <a href="/dongwang218" class="url" hreflang="en" rel="contact" title="Dong Wang"><img alt="Dong Wang" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/382072008/dong_mini.jpg" width="24" /></a> </span>
483
+
484
+
485
+ <span class="vcard">
486
+ <a href="/bsuto" class="url" hreflang="en" rel="contact" title="Brian Sutorius"><img alt="Brian Sutorius" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/452951844/Screen_shot_2009-10-04_at_3.39.37_PM_mini.png" width="24" /></a> </span>
487
+
488
+
489
+ <span class="vcard">
490
+ <a href="/BFF" class="url" hreflang="en" rel="contact" title="Brandi"><img alt="Brandi" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/511212714/care-bears_best-friends_mini.jpg" width="24" /></a> </span>
491
+
492
+
493
+ <span class="vcard">
494
+ <a href="/dino" class="url" hreflang="en" rel="contact" title="Dino"><img alt="Dino" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/508083749/Photo_2_mini.jpg" width="24" /></a> </span>
495
+
496
+
497
+ <span class="vcard">
498
+ <a href="/francesca" class="url" hreflang="en" rel="contact" title="Francesca"><img alt="Francesca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/115081740/Singlephoto_mini.jpg" width="24" /></a> </span>
499
+
500
+
501
+ <span class="vcard">
502
+ <a href="/th" class="url" hreflang="en" rel="contact" title="taylor harwin"><img alt="taylor harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/492316898/IMG_0058_mini.JPG" width="24" /></a> </span>
503
+
504
+
505
+ <span class="vcard">
506
+ <a href="/taylorharwin" class="url" hreflang="fr" rel="contact" title="Taylor Harwin"><img alt="Taylor Harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/475719652/2847_626906472242_105819_36830072_2929508_n_mini.jpg" width="24" /></a> </span>
507
+
508
+
509
+ <span class="vcard">
510
+ <a href="/mischahere" class="url" hreflang="en" rel="contact" title="Mischa Nachtigal"><img alt="Mischa Nachtigal" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/435895728/waldo_mini.jpg" width="24" /></a> </span>
511
+
512
+
513
+ <span class="vcard">
514
+ <a href="/cayley" class="url" hreflang="en" rel="contact" title="Cayley Torgeson"><img alt="Cayley Torgeson" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/446923096/IMG_0072_mini.jpg" width="24" /></a> </span>
515
+
516
+
517
+ <span class="vcard">
518
+ <a href="/meetutkarsh" class="url" hreflang="en" rel="contact" title="Utkarsh Srivastava"><img alt="Utkarsh Srivastava" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/193389882/IMG_1906_mini.JPG" width="24" /></a> </span>
519
+
520
+
521
+ <span class="vcard">
522
+ <a href="/twitter_es" class="url" hreflang="en" rel="contact" title="Twitter Español"><img alt="Twitter Español" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/504883319/twitter_bird_profile_mini.png" width="24" /></a> </span>
523
+
524
+
525
+ <span class="vcard">
526
+ <a href="/rion" class="url" hreflang="en" rel="contact" title="Rion"><img alt="Rion" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/182109900/Photo_8_mini.jpg" width="24" /></a> </span>
527
+
528
+
529
+ <span class="vcard">
530
+ <a href="/nancyjconnery" class="url" hreflang="en" rel="contact" title="Nancy Connery"><img alt="Nancy Connery" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/93080012/Mommy_and_Morgan_mini.jpg" width="24" /></a> </span>
531
+
532
+
533
+ <span class="vcard">
534
+ <a href="/tamtam2" class="url" hreflang="en" rel="contact" title="tamtam2"><img alt="tamtam2" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/458987118/Taimay_Red_Outside_2_half_mini.jpg" width="24" /></a> </span>
535
+
536
+
537
+ <span class="vcard">
538
+ <a href="/em33" class="url" hreflang="en" rel="contact" title="Emee"><img alt="Emee" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/61480913/em_mini.PNG" width="24" /></a> </span>
539
+
540
+
541
+ <span class="vcard">
542
+ <a href="/andr8a" class="url" hreflang="en" rel="contact" title="andr8a"><img alt="andr8a" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/81960525/kitty_mini.jpg" width="24" /></a> </span>
543
+
544
+
545
+ <span class="vcard">
546
+ <a href="/keerthi" class="url" hreflang="en" rel="contact" title="Keerthi Prakash"><img alt="Keerthi Prakash" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/104888409/keerthi_2_2__mini.jpg" width="24" /></a> </span>
547
+
548
+
549
+ <span class="vcard">
550
+ <a href="/Lukester" class="url" hreflang="en" rel="contact" title="Luke "><img alt="Luke " class="photo fn" height="24" src="http://a1.twimg.com/profile_images/543545486/image_mini.jpg" width="24" /></a> </span>
551
+
552
+
553
+ <span class="vcard">
554
+ <a href="/sean" class="url" hreflang="ja" rel="contact" title="Sean"><img alt="Sean" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/468339255/645882267_8CRyH-L_mini.jpg" width="24" /></a> </span>
555
+
556
+
557
+ <span class="vcard">
558
+ <a href="/che" class="url" hreflang="en" rel="contact" title="Cheryl Palarca"><img alt="Cheryl Palarca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/399472616/Picture_2a_mini.jpg" width="24" /></a> </span>
559
+
560
+
561
+ <span class="vcard">
562
+ <a href="/lg" class="url" hreflang="en" rel="contact" title="Larry Gadea"><img alt="Larry Gadea" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/53283340/q90406211_4464_mini.jpg" width="24" /></a> </span>
563
+
564
+
565
+ <span class="vcard">
566
+ <a href="/tiger" class="url" hreflang="en" rel="contact" title="Emily"><img alt="Emily" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257293259/724px-Tigerente_mini.jpg" width="24" /></a> </span>
567
+
568
+
569
+ <span class="vcard">
570
+ <a href="/mattknox" class="url" hreflang="en" rel="contact" title="matt knox"><img alt="matt knox" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257712871/n14602342_30779909_9881_mini.jpg" width="24" /></a> </span>
571
+
572
+
573
+ <span class="vcard">
574
+ <a href="/Magnuson" class="url" hreflang="en" rel="contact" title="Charles Magnuson"><img alt="Charles Magnuson" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/371840833/twitter_pic_mini.jpg" width="24" /></a> </span>
575
+
576
+
577
+ <span class="vcard">
578
+ <a href="/Charles" class="url" hreflang="en" rel="contact" title="Charles"><img alt="Charles" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/529789365/4099673652_248e60ccf5_mini.jpg" width="24" /></a> </span>
579
+
580
+
581
+ <span class="vcard">
582
+ <a href="/rsarver" class="url" hreflang="en" rel="contact" title="Ryan Sarver"><img alt="Ryan Sarver" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53700173/2448434960_65aba38823_t_mini.jpg" width="24" /></a> </span>
583
+
584
+
585
+ <span class="vcard">
586
+ <a href="/ElizaSwan" class="url" hreflang="en" rel="contact" title="Robin"><img alt="Robin" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/260125397/icon_mini.png" width="24" /></a> </span>
587
+
588
+
589
+ <span class="vcard">
590
+ <a href="/sam" class="url" hreflang="en" rel="contact" title="Sam Luckenbill"><img alt="Sam Luckenbill" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/65334155/601879580_479440f611_mini.jpg" width="24" /></a> </span>
591
+
592
+
593
+ <span class="vcard">
594
+ <a href="/noradio" class="url" hreflang="en" rel="contact" title="Marcel Molina"><img alt="Marcel Molina" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53473799/marcel-euro-rails-conf_mini.jpg" width="24" /></a> </span>
595
+
596
+
597
+ <span class="vcard">
598
+ <a href="/bakineggs" class="url" hreflang="en" rel="contact" title="Dan Barry"><img alt="Dan Barry" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/59318058/n30408690_33614537_9476_square_mini.jpg" width="24" /></a> </span>
599
+
600
+
601
+ <span class="vcard">
602
+ <a href="/emaland" class="url" hreflang="en" rel="contact" title="emaland"><img alt="emaland" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/68049269/n707915915_121_mini.jpg" width="24" /></a> </span>
603
+
604
+
605
+ <span class="vcard">
606
+ <a href="/ablegrape" class="url" hreflang="en" rel="contact" title="Doug Cook"><img alt="Doug Cook" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/76892852/sticker2_mini.jpg" width="24" /></a> </span>
607
+
608
+
609
+ </div>
610
+ <div id="friends_view_all">
611
+ <a href="/twitter/following" rel="me">View all&hellip;</a>
612
+ </div>
613
+
614
+ </div>
615
+
616
+
617
+ </div>
618
+
619
+
620
+
621
+
622
+ <div id="rssfeed">
623
+ <hr/>
624
+ <a href="/statuses/user_timeline/783214.rss" class="xref rss profile-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's tweets</a>
625
+ <a href="/favorites/783214.rss" class="xref rss favorites-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's favorites</a>
626
+ </div>
627
+
628
+
629
+ </div>
630
+ </td>
631
+
632
+ </tr>
633
+ </tbody>
634
+ </table>
635
+
636
+
637
+
638
+ <div id="footer"
639
+ class="round">
640
+ <h3 class="offscreen">Footer</h3>
641
+
642
+
643
+ <ul>
644
+ <li class="first">&copy; 2009 Twitter</li>
645
+ <li><a href="/about#about">About Us</a></li>
646
+ <li><a href="/about#contact">Contact</a></li>
647
+ <li><a href="http://blog.twitter.com">Blog</a></li>
648
+ <li><a href="http://status.twitter.com">Status</a></li>
649
+ <li><a href="/goodies">Goodies</a></li>
650
+ <li><a href="http://apiwiki.twitter.com/">API</a></li>
651
+ <li><a href="http://business.twitter.com/twitter101">Business</a></li>
652
+ <li><a href="http://help.twitter.com">Help</a></li>
653
+ <li><a href="/jobs">Jobs</a></li>
654
+ <li><a href="/tos">Terms</a></li>
655
+ <li><a href="/privacy">Privacy</a></li>
656
+ </ul>
657
+ </div>
658
+
659
+
660
+
661
+ <hr />
662
+
663
+ </div>
664
+
665
+
666
+
667
+ <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.0/jquery.min.js" type="text/javascript"></script>
668
+ <script src="http://a0.twimg.com/a/1259091217/javascripts/twitter.js?1259117012" type="text/javascript"></script>
669
+ <script src="http://a2.twimg.com/a/1259091217/javascripts/lib/jquery.tipsy.min.js?1259117012" type="text/javascript"></script>
670
+ <script type="text/javascript">
671
+ //<![CDATA[
672
+ page.user_screenname = 'twitter';
673
+ page.user_fullname = 'Twitter';
674
+ page.controller_name = 'AccountController';
675
+ page.action_name = 'profile';
676
+ twttr.form_authenticity_token = '7e394453cc9d849cab133ccb3180e8ae4e6a0258';
677
+ // FIXME: Reconcile with the kinds on the Status model.
678
+ twttr.statusKinds = {
679
+ UPDATE: 1,
680
+ SHARE: 2
681
+ };
682
+ twttr.ListPerUserLimit = 20;
683
+
684
+
685
+ //]]>
686
+ </script>
687
+ <script type="text/javascript">
688
+ //<![CDATA[
689
+
690
+ $( function () {
691
+ initializePage();
692
+
693
+ });
694
+
695
+ //]]>
696
+ </script>
697
+
698
+ <!-- BEGIN google analytics -->
699
+
700
+ <script type="text/javascript">
701
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
702
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
703
+ </script>
704
+
705
+ <script type="text/javascript">
706
+
707
+ try {
708
+ var pageTracker = _gat._getTracker("UA-30775-6");
709
+ pageTracker._setDomainName("twitter.com");
710
+ pageTracker._setVar('Not Logged In');
711
+ pageTracker._setVar('lang: en');
712
+ pageTracker._initData();
713
+ pageTracker._trackPageview('/profile/not_logged_in/twitter');
714
+ } catch(err) { }
715
+
716
+ </script>
717
+
718
+ <!-- END google analytics -->
719
+
720
+
721
+
722
+
723
+
724
+ <div id="notifications"></div>
725
+
726
+
727
+
728
+ <!-- ERB -->
729
+ </body>
730
+
731
+ </html>
@@ -0,0 +1,62 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ describe Scraper do
4
+ before :all do
5
+ @uri = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'twitter.html' ))
6
+ @pattern = ".status"
7
+ class Entry < Object
8
+ attr_accessor :text, :date
9
+ end
10
+ end
11
+
12
+ before :each do
13
+ Scraper.scrapers = {}
14
+ end
15
+
16
+ describe "defining a scraper" do
17
+ it "should create a new scraper using the specified name" do
18
+ Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) {}
19
+
20
+ Scraper(:s).scraper_source.should == @uri
21
+ end
22
+
23
+ it "should be set with and without block" do
24
+ Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) do |s|
25
+ s.text "foo"
26
+ s.date {"bar"}
27
+ end
28
+
29
+ @objects = Scraper.parse(:s)
30
+ @objects.first.text.should == "foo"
31
+ @objects.first.date.should == "bar"
32
+ end
33
+
34
+ it "should get the objects from twitter" do
35
+ Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
36
+ s.text { |node| node.at(".entry-content").inner_html }
37
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
38
+ end
39
+
40
+ @objects = Scraper.parse(:twitter)
41
+ @objects.size.should == 20
42
+ @objects.first.text.should == "SMS delivery issues on AT&T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
43
+ @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
44
+ end
45
+
46
+ it "should use a different html parser" do
47
+ require 'nokogiri'
48
+ Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
49
+
50
+ Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
51
+ s.text { |node| node.at(".entry-content").inner_html }
52
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
53
+ end
54
+
55
+ @objects = Scraper.parse(:twitter)
56
+ @objects.size.should == 20
57
+ @objects.first.text.should == "SMS delivery issues on AT&amp;T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
58
+ @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
59
+ end
60
+
61
+ end
62
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --format progress
2
+ --color
@@ -0,0 +1,7 @@
1
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
2
+ $: << File.join(File.dirname(__FILE__))
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'spec/autorun'
7
+ require 'object-scraper'
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: object-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Enrico Genauck
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.2
24
+ version:
25
+ description: Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.
26
+ email: kontakt@enricogenauck.de
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.rdoc
33
+ - lib/object-scraper.rb
34
+ - lib/object-scraper/scraper.rb
35
+ files:
36
+ - Manifest
37
+ - README.rdoc
38
+ - Rakefile
39
+ - lib/object-scraper.rb
40
+ - lib/object-scraper/scraper.rb
41
+ - object-scraper.gemspec
42
+ - spec/data/twitter.html
43
+ - spec/object-scraper/scraper_spec.rb
44
+ - spec/spec.opts
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/enricogenauck/object-scraper
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --line-numbers
53
+ - --inline-source
54
+ - --title
55
+ - Object-scraper
56
+ - --main
57
+ - README.rdoc
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "1.2"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: object-scraper
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Recipe like object extraction from HTML sources
79
+ test_files: []
80
+