object-scraper 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +10 -0
- data/README.rdoc +53 -0
- data/Rakefile +16 -0
- data/lib/object-scraper/scraper.rb +128 -0
- data/lib/object-scraper.rb +20 -0
- data/object-scraper.gemspec +33 -0
- data/spec/data/twitter.html +731 -0
- data/spec/object-scraper/scraper_spec.rb +62 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +7 -0
- metadata +80 -0
data/Manifest
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
= Object Scraper
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
Object scraper is a thin wrapper for hpricot to enable receipt-like
|
6
|
+
extraction of ruby objects from various web sites.
|
7
|
+
|
8
|
+
== Install
|
9
|
+
|
10
|
+
=== Gem
|
11
|
+
|
12
|
+
gem install object-scraper --source http://gemcutter.org
|
13
|
+
|
14
|
+
=== Rails
|
15
|
+
|
16
|
+
config.gem 'object-scraper', :source => 'http://gemcutter.org'
|
17
|
+
|
18
|
+
== Example
|
19
|
+
|
20
|
+
class Entry < Object
|
21
|
+
attr_accessor :text, :date
|
22
|
+
end
|
23
|
+
|
24
|
+
uri = "http://twitter.com/twitter"
|
25
|
+
pattern = ".status"
|
26
|
+
|
27
|
+
Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
|
28
|
+
s.text { |node| node.at(".entry-content").inner_html }
|
29
|
+
s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
|
30
|
+
end
|
31
|
+
|
32
|
+
@objects = Scraper.parse(:twitter)
|
33
|
+
|
34
|
+
== Advanced Example
|
35
|
+
|
36
|
+
It is possible to use other existing HTML parsers instead of hpricot.
|
37
|
+
Just overwrite the according proc object.
|
38
|
+
|
39
|
+
require 'nokogiri'
|
40
|
+
Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
|
41
|
+
|
42
|
+
Scraper.define(:twitter, :class => :entry, :source => uri, :node => pattern) do |s|
|
43
|
+
# initialize your objects here accordingly
|
44
|
+
end
|
45
|
+
|
46
|
+
== Rails
|
47
|
+
|
48
|
+
All scraper definitions sitting in RAILS_ROOT/scrapers will be taken into account
|
49
|
+
automatically when you use object-scraper as a gem in your rails project.
|
50
|
+
|
51
|
+
== Author
|
52
|
+
|
53
|
+
- Maintained by {Enrico Genauck}[mailto:kontakt@enricogenauck.de]
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('object-scraper', '0.0.2') do |p|
|
6
|
+
p.summary = "Recipe like object extraction from HTML sources"
|
7
|
+
p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
|
8
|
+
p.url = "http://github.com/enricogenauck/object-scraper"
|
9
|
+
p.author = "Enrico Genauck"
|
10
|
+
p.email = "kontakt@enricogenauck.de"
|
11
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
12
|
+
p.development_dependencies = []
|
13
|
+
p.runtime_dependencies = ["hpricot >=0.8.2"]
|
14
|
+
end
|
15
|
+
|
16
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
@@ -0,0 +1,128 @@
|
|
1
|
+
class Scraper
|
2
|
+
|
3
|
+
# Raised when a scraper is defined with the same name as a previously-defined scraper.
|
4
|
+
class DuplicateDefinitionError < RuntimeError
|
5
|
+
end
|
6
|
+
|
7
|
+
class << self
|
8
|
+
attr_accessor :scrapers
|
9
|
+
attr_accessor :scrape_source_with
|
10
|
+
attr_accessor :definition_file_paths
|
11
|
+
end
|
12
|
+
|
13
|
+
self.scrapers = {}
|
14
|
+
self.scrape_source_with = Proc.new { |source| Hpricot(source) }
|
15
|
+
self.definition_file_paths = %w(scrapers)
|
16
|
+
|
17
|
+
attr_reader :scraper_source, :scraper_node
|
18
|
+
|
19
|
+
def self.define(name, options = {}, &block)
|
20
|
+
instance = Scraper.new(name, options, &block)
|
21
|
+
|
22
|
+
if self.scrapers[name]
|
23
|
+
raise DuplicateDefinitionError, "Scraper already defined: #{name}"
|
24
|
+
end
|
25
|
+
|
26
|
+
self.scrapers[name] = instance
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(name, options = {}, &block) #:nodoc:
|
30
|
+
assert_valid_options(options)
|
31
|
+
@objects = []
|
32
|
+
@class = class_for(options[:class])
|
33
|
+
@scraper_source = options[:source]
|
34
|
+
@scraper_node = options[:node]
|
35
|
+
@block = block
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get(name)
|
39
|
+
scraper_by_name(name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.parse(name)
|
43
|
+
scraper_by_name(name).parse
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse
|
47
|
+
doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
|
48
|
+
doc.search(@scraper_node).each do |n|
|
49
|
+
@current_node = n
|
50
|
+
@current_object = @class.new
|
51
|
+
@objects << @current_object
|
52
|
+
@block.call(self)
|
53
|
+
end
|
54
|
+
@objects
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.scraper_by_name(name)
|
58
|
+
scrapers[name.to_sym] or raise ArgumentError, "No such scraper: #{name.to_s}"
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing(symbol, *args, &block)
|
62
|
+
if block_given?
|
63
|
+
@current_object.send("#{symbol}=", yield(@current_node))
|
64
|
+
else
|
65
|
+
@current_object.send("#{symbol}=", args.first)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.find_definitions
|
70
|
+
definition_file_paths.each do |path|
|
71
|
+
require("#{path}.rb") if File.exists?("#{path}.rb")
|
72
|
+
|
73
|
+
if File.directory? path
|
74
|
+
Dir[File.join(path, '*.rb')].each do |file|
|
75
|
+
require file
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def class_for(class_or_to_s)
|
84
|
+
if class_or_to_s.respond_to?(:to_sym)
|
85
|
+
Object.const_get(variable_name_to_class_name(class_or_to_s))
|
86
|
+
else
|
87
|
+
class_or_to_s
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def scraper_name_for(class_or_to_s)
|
92
|
+
if class_or_to_s.respond_to?(:to_sym)
|
93
|
+
class_or_to_s.to_sym
|
94
|
+
else
|
95
|
+
class_name_to_variable_name(class_or_to_s).to_sym
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def class_name_to_variable_name(name)
|
100
|
+
name.to_s.gsub(/::/, '/').
|
101
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
102
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
103
|
+
tr("-", "_").
|
104
|
+
downcase
|
105
|
+
end
|
106
|
+
|
107
|
+
def variable_name_to_class_name(name)
|
108
|
+
name.to_s.
|
109
|
+
gsub(/\/(.?)/) { "::#{$1.upcase}" }.
|
110
|
+
gsub(/(?:^|_)(.)/) { $1.upcase }
|
111
|
+
end
|
112
|
+
|
113
|
+
def assert_valid_options(options)
|
114
|
+
invalid_keys = options.keys - [:class, :source, :node]
|
115
|
+
unless invalid_keys == []
|
116
|
+
raise ArgumentError, "Unknown arguments: #{invalid_keys.inspect}"
|
117
|
+
end
|
118
|
+
unless options[:class]
|
119
|
+
raise ArgumentError, "Missing argument: :class"
|
120
|
+
end
|
121
|
+
unless options[:source]
|
122
|
+
raise ArgumentError, "Missing argument: :source"
|
123
|
+
end
|
124
|
+
unless options[:node]
|
125
|
+
raise ArgumentError, "Missing argument: :node"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'object-scraper/scraper'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'hpricot'
|
4
|
+
|
5
|
+
# Shortcut for Scraper.extract
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# Scraper(:my_space)
|
9
|
+
def Scraper(name)
|
10
|
+
Scraper.get(name)
|
11
|
+
end
|
12
|
+
|
13
|
+
if defined? Rails.configuration
|
14
|
+
Rails.configuration.after_initialize do
|
15
|
+
Scraper.definition_file_paths = [File.join(RAILS_ROOT, 'scrapers')]
|
16
|
+
Scraper.find_definitions
|
17
|
+
end
|
18
|
+
else
|
19
|
+
Scraper.find_definitions
|
20
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{object-scraper}
|
5
|
+
s.version = "0.0.2"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Enrico Genauck"]
|
9
|
+
s.date = %q{2009-12-10}
|
10
|
+
s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
|
11
|
+
s.email = %q{kontakt@enricogenauck.de}
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "object-scraper.gemspec", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
|
14
|
+
s.homepage = %q{http://github.com/enricogenauck/object-scraper}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Object-scraper", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{object-scraper}
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
19
|
+
s.summary = %q{Recipe like object extraction from HTML sources}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_runtime_dependency(%q<hpricot>, [">= 0.8.2"])
|
27
|
+
else
|
28
|
+
s.add_dependency(%q<hpricot>, [">= 0.8.2"])
|
29
|
+
end
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<hpricot>, [">= 0.8.2"])
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,731 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
2
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
3
|
+
<head>
|
4
|
+
<script type="text/javascript">
|
5
|
+
//<![CDATA[
|
6
|
+
var page={};var onCondition=function(D,C,A,B){D=D;A=A?Math.min(A,5):5;B=B||100;if(D()){C()}else{if(A>1){setTimeout(function(){onCondition(D,C,A-1,B)},B)}}};
|
7
|
+
//]]>
|
8
|
+
</script>
|
9
|
+
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
|
10
|
+
<meta content="en-us" http-equiv="Content-Language" />
|
11
|
+
<meta content="Always wondering what's happening. " name="description" />
|
12
|
+
<meta content="no" http-equiv="imagetoolbar" />
|
13
|
+
<meta content="width = 780" name="viewport" />
|
14
|
+
<meta content="4FTTxY4uvo0RZTMQqIyhh18HsepyJOctQ+XTOu1zsfE=" name="verify-v1" />
|
15
|
+
<meta content="1" name="page" />
|
16
|
+
<meta content="NOODP" name="robots" />
|
17
|
+
<meta content="n" name="session-loggedin" />
|
18
|
+
<meta content="twitter" name="page-user-screen_name" />
|
19
|
+
<title id="page_title">Twitter (twitter) on Twitter</title>
|
20
|
+
<link href="http://a1.twimg.com/a/1259091217/images/favicon.ico" rel="shortcut icon" type="image/x-icon" />
|
21
|
+
<link href="http://a1.twimg.com/a/1259091217/images/twitter_57.png" rel="apple-touch-icon" />
|
22
|
+
<link rel="alternate" href="http://twitter.com/statuses/user_timeline/783214.rss" title="twitter's Tweets" type="application/rss+xml" />
|
23
|
+
<link rel="alternate" href="http://twitter.com/favorites/783214.rss" title="twitter's Favorites" type="application/rss+xml" />
|
24
|
+
|
25
|
+
|
26
|
+
<link href="http://a1.twimg.com/a/1259091217/stylesheets/twitter.css?1259117012" media="screen" rel="stylesheet" type="text/css" />
|
27
|
+
<style type="text/css">
|
28
|
+
|
29
|
+
body { background: #C0DEED url('http://a3.twimg.com/a/1259091217/images/bg-clouds.png') repeat-x; }
|
30
|
+
|
31
|
+
|
32
|
+
body {
|
33
|
+
background-position: 0 0;
|
34
|
+
padding-top: 0;
|
35
|
+
}
|
36
|
+
|
37
|
+
</style>
|
38
|
+
<link href="http://a0.twimg.com/a/1259091217/stylesheets/following.css?1259117012" media="screen, projection" rel="stylesheet" type="text/css" />
|
39
|
+
|
40
|
+
</head>
|
41
|
+
|
42
|
+
<body class="account safari" id="profile">
|
43
|
+
<script type="text/javascript">
|
44
|
+
//<![CDATA[
|
45
|
+
if (window.top !== window.self) {document.write = "";window.top.location = window.self.location; setTimeout(function(){document.body.innerHTML='';},1);window.self.onload=function(evt){document.body.innerHTML='';};}
|
46
|
+
//]]>
|
47
|
+
</script>
|
48
|
+
|
49
|
+
|
50
|
+
<div id="dim-screen"></div>
|
51
|
+
<ul id="accessibility" class="offscreen">
|
52
|
+
<li><a href="#content" accesskey="0">Skip past navigation</a></li>
|
53
|
+
<li>On a mobile phone? Check out <a href="http://m.twitter.com/">m.twitter.com</a>!</li>
|
54
|
+
<li><a href="#footer" accesskey="2">Skip to navigation</a></li>
|
55
|
+
<li><a href="#signin">Skip to sign in form</a></li>
|
56
|
+
</ul>
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
<div id="container" class="subpage">
|
61
|
+
<span id="loader" style="display:none"><img alt="Loader" src="http://a0.twimg.com/a/1259091217/images/loader.gif" /></span>
|
62
|
+
<div id="header">
|
63
|
+
<a href="http://twitter.com/" title="Twitter / Home" accesskey="1" id="logo">
|
64
|
+
<img alt="Twitter.com" height="36" src="http://a0.twimg.com/a/1259091217/images/twitter_logo_header.png" width="155" />
|
65
|
+
</a>
|
66
|
+
<form method="post" id="sign_out_form" action="/sessions/destroy" style="display:none;">
|
67
|
+
<input name="authenticity_token" value="7e394453cc9d849cab133ccb3180e8ae4e6a0258" type="hidden" />
|
68
|
+
</form>
|
69
|
+
|
70
|
+
<ul class="top-navigation round">
|
71
|
+
<li><a href="/login" accesskey="l">Login</a></li>
|
72
|
+
<li class="signup-link"><a href="/signup">Join Twitter!</a></li>
|
73
|
+
</ul>
|
74
|
+
</div>
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
<div id="profilebox_outer">
|
79
|
+
<div id="profilebird"><img alt="Profile_bird" height="48" id="profilebirdimg" src="http://a0.twimg.com/a/1259091217/images/profile_bird.png" width="48" /></div>
|
80
|
+
<div id="profilebox" class="clearfix">
|
81
|
+
<div id="profiletext">
|
82
|
+
<h1>Hey there! <strong>twitter</strong> is using Twitter.</h1>
|
83
|
+
<h2>Twitter is a free service that lets you keep in touch with people through the exchange of quick, frequent answers to one simple question: What's happening? <strong>Join today</strong> to start receiving <strong>twitter's</strong> tweets.</h2>
|
84
|
+
</div>
|
85
|
+
<div id="profilebutton">
|
86
|
+
<form name="account_signup_form" id="account_signup_form" action="/signup">
|
87
|
+
<input id="follow" name="follow" type="hidden" value="twitter" />
|
88
|
+
<input class="profilesubmit" id="join" name="commit" type="submit" value="Join today!" />
|
89
|
+
</form>
|
90
|
+
<p><small>Already using Twitter<br /> from your phone? <a href="/account/complete">Click here</a>.</small></p>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
<div class="content-bubble-arrow"></div>
|
100
|
+
|
101
|
+
|
102
|
+
<table cellspacing="0" class="columns">
|
103
|
+
<tbody>
|
104
|
+
<tr>
|
105
|
+
<td id="content" class="round-left column">
|
106
|
+
<div class="wrapper">
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
<div class="profile-user">
|
117
|
+
<div id="user_783214" class="user ">
|
118
|
+
<h2 class="thumb clearfix">
|
119
|
+
<a href="/account/profile_image/twitter?hreflang=en"><img alt="" border="0" height="73" id="profile-image" src="http://a1.twimg.com/profile_images/75075164/twitter_bird_profile_bigger.png" valign="middle" width="73" /></a>
|
120
|
+
<div class="screen-name">twitter</div>
|
121
|
+
</h2>
|
122
|
+
</div>
|
123
|
+
</div>
|
124
|
+
|
125
|
+
|
126
|
+
<div class="section">
|
127
|
+
|
128
|
+
<div id="timeline_heading" style="display: none;">
|
129
|
+
<h1 id="heading"></h1>
|
130
|
+
</div>
|
131
|
+
<ol id='timeline' class='statuses'>
|
132
|
+
<li class="hentry u-twitter status latest-status" id="status_6191506635">
|
133
|
+
<span class="status-body">
|
134
|
+
<span class="entry-content">SMS delivery issues on AT&T <a href="http://bit.ly/7JFJ6H" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/7JFJ6H</a></span>
|
135
|
+
<span class="meta entry-meta">
|
136
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/6191506635">
|
137
|
+
<span class="published timestamp" data="{time:'Mon Nov 30 04:10:51 +0000 2009'}">about 8 hours ago</span>
|
138
|
+
</a>
|
139
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
140
|
+
|
141
|
+
</span>
|
142
|
+
</span>
|
143
|
+
</li>
|
144
|
+
<li class="hentry u-twitter status" id="status_5989297065">
|
145
|
+
<span class="status-body">
|
146
|
+
<span class="entry-content">Fixing elevated error rate on twitter.com <a href="http://bit.ly/4xRf8U" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4xRf8U</a></span>
|
147
|
+
<span class="meta entry-meta">
|
148
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5989297065">
|
149
|
+
<span class="published timestamp" data="{time:'Mon Nov 23 22:13:27 +0000 2009'}">2:13 PM Nov 23rd</span>
|
150
|
+
</a>
|
151
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
152
|
+
|
153
|
+
</span>
|
154
|
+
</span>
|
155
|
+
</li>
|
156
|
+
<li class="hentry u-twitter status" id="status_5875860574">
|
157
|
+
<span class="status-body">
|
158
|
+
<span class="entry-content">Abonnez-vous à @<a class="tweet-url username" href="/Twitter_FR">Twitter_FR</a>, le compte officiel de Twitter en français!</span>
|
159
|
+
<span class="meta entry-meta">
|
160
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875860574">
|
161
|
+
<span class="published timestamp" data="{time:'Fri Nov 20 01:34:31 +0000 2009'}">5:34 PM Nov 19th</span>
|
162
|
+
</a>
|
163
|
+
<span>from web</span>
|
164
|
+
|
165
|
+
</span>
|
166
|
+
</span>
|
167
|
+
</li>
|
168
|
+
<li class="hentry u-twitter status" id="status_5875627626">
|
169
|
+
<span class="status-body">
|
170
|
+
<span class="entry-content">Nouvelle saveur : Twitter en Français! <a href="http://bit.ly/DeCHQ" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/DeCHQ</a></span>
|
171
|
+
<span class="meta entry-meta">
|
172
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5875627626">
|
173
|
+
<span class="published timestamp" data="{time:'Fri Nov 20 01:25:37 +0000 2009'}">5:25 PM Nov 19th</span>
|
174
|
+
</a>
|
175
|
+
<span>from web</span>
|
176
|
+
|
177
|
+
</span>
|
178
|
+
</span>
|
179
|
+
</li>
|
180
|
+
<li class="hentry u-twitter status" id="status_5874789939">
|
181
|
+
<span class="status-body">
|
182
|
+
<span class="entry-content">RT @<a class="tweet-url username" href="/macgill">macgill</a>: Released refreshed Twitter privacy policy <a href="http://bit.ly/kYyQ6" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/kYyQ6</a> and a new helpful trademark page <a href="http://bit.ly/2iGZgV" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2iGZgV</a> Check </span>
|
183
|
+
<a href="http://twitter.com/twitter/status/5874789939">...</a> <span class="meta entry-meta">
|
184
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5874789939">
|
185
|
+
<span class="published timestamp" data="{time:'Fri Nov 20 00:53:33 +0000 2009'}">4:53 PM Nov 19th</span>
|
186
|
+
</a>
|
187
|
+
<span>from web</span>
|
188
|
+
|
189
|
+
</span>
|
190
|
+
</span>
|
191
|
+
</li>
|
192
|
+
<li class="hentry u-twitter status" id="status_5870049749">
|
193
|
+
<span class="status-body">
|
194
|
+
<span class="entry-content">Think globally, Tweet locally with geotagging. Opt-in! <a href="http://blog.twitter.com/2009/11/think-globally-tweet-locally.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
|
195
|
+
<span class="meta entry-meta">
|
196
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5870049749">
|
197
|
+
<span class="published timestamp" data="{time:'Thu Nov 19 21:54:41 +0000 2009'}">1:54 PM Nov 19th</span>
|
198
|
+
</a>
|
199
|
+
<span>from web</span>
|
200
|
+
|
201
|
+
</span>
|
202
|
+
</span>
|
203
|
+
</li>
|
204
|
+
<li class="hentry u-twitter status" id="status_5867297191">
|
205
|
+
<span class="status-body">
|
206
|
+
<span class="entry-content">RT @<a class="tweet-url username" href="/davewiner">davewiner</a>: The new Retweet is cool! (Scripting News) <a href="http://r2.ly/wvz4" class="tweet-url web" rel="nofollow" target="_blank">http://r2.ly/wvz4</a></span>
|
207
|
+
<span class="meta entry-meta">
|
208
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5867297191">
|
209
|
+
<span class="published timestamp" data="{time:'Thu Nov 19 20:09:04 +0000 2009'}">12:09 PM Nov 19th</span>
|
210
|
+
</a>
|
211
|
+
<span>from web</span>
|
212
|
+
|
213
|
+
</span>
|
214
|
+
</span>
|
215
|
+
</li>
|
216
|
+
<li class="hentry u-twitter status" id="status_5866757295">
|
217
|
+
<span class="status-body">
|
218
|
+
<span class="entry-content">What did the general say when he found out his army was going to lose the war? Retweet! (feature now available to all users)</span>
|
219
|
+
<span class="meta entry-meta">
|
220
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5866757295">
|
221
|
+
<span class="published timestamp" data="{time:'Thu Nov 19 19:48:01 +0000 2009'}">11:48 AM Nov 19th</span>
|
222
|
+
</a>
|
223
|
+
<span>from web</span>
|
224
|
+
|
225
|
+
</span>
|
226
|
+
</span>
|
227
|
+
</li>
|
228
|
+
<li class="hentry u-twitter status" id="status_5865461320">
|
229
|
+
<span class="status-body">
|
230
|
+
<span class="entry-content">Quick question: What's happening? <a href="http://blog.twitter.com/2009/11/whats-happening.html" class="tweet-url web" rel="nofollow" target="_blank">http://blog.twitter.com/200...</a></span>
|
231
|
+
<span class="meta entry-meta">
|
232
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5865461320">
|
233
|
+
<span class="published timestamp" data="{time:'Thu Nov 19 18:57:36 +0000 2009'}">10:57 AM Nov 19th</span>
|
234
|
+
</a>
|
235
|
+
<span>from web</span>
|
236
|
+
|
237
|
+
</span>
|
238
|
+
</span>
|
239
|
+
</li>
|
240
|
+
<li class="hentry u-twitter status" id="status_5838277342">
|
241
|
+
<span class="status-body">
|
242
|
+
<span class="entry-content">A picture is worth more than 140 characters. Shout out to @<a class="tweet-url username" href="/flickr">flickr</a> for their handy Flickr2Twitter app! <a href="/search?q=%23appwednesday" title="#appwednesday" class="tweet-url hashtag">#appwednesday</a></span>
|
243
|
+
<span class="meta entry-meta">
|
244
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5838277342">
|
245
|
+
<span class="published timestamp" data="{time:'Wed Nov 18 21:47:50 +0000 2009'}">1:47 PM Nov 18th</span>
|
246
|
+
</a>
|
247
|
+
<span>from web</span>
|
248
|
+
|
249
|
+
</span>
|
250
|
+
</span>
|
251
|
+
</li>
|
252
|
+
<li class="hentry u-twitter status" id="status_5812855148">
|
253
|
+
<span class="status-body">
|
254
|
+
<span class="entry-content">Wrong profile background pictures <a href="http://bit.ly/6lMhG" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/6lMhG</a></span>
|
255
|
+
<span class="meta entry-meta">
|
256
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5812855148">
|
257
|
+
<span class="published timestamp" data="{time:'Wed Nov 18 01:11:32 +0000 2009'}">5:11 PM Nov 17th</span>
|
258
|
+
</a>
|
259
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
260
|
+
|
261
|
+
</span>
|
262
|
+
</span>
|
263
|
+
</li>
|
264
|
+
<li class="hentry u-twitter status" id="status_5783517157">
|
265
|
+
<span class="status-body">
|
266
|
+
<span class="entry-content">Another first from the UK - MMS your pics to Twitter! <a href="http://bit.ly/5cm7R" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/5cm7R</a></span>
|
267
|
+
<span class="meta entry-meta">
|
268
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5783517157">
|
269
|
+
<span class="published timestamp" data="{time:'Tue Nov 17 02:01:27 +0000 2009'}">6:01 PM Nov 16th</span>
|
270
|
+
</a>
|
271
|
+
<span>from web</span>
|
272
|
+
|
273
|
+
</span>
|
274
|
+
</span>
|
275
|
+
</li>
|
276
|
+
<li class="hentry u-twitter status" id="status_5782562902">
|
277
|
+
<span class="status-body">
|
278
|
+
<span class="entry-content">Breaking in the new office with an awesome performance by @<a class="tweet-url username" href="/1republic">1republic</a>. Streaming live at <a href="http://bit.ly/4C8Hvv" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4C8Hvv</a>.</span>
|
279
|
+
<span class="meta entry-meta">
|
280
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5782562902">
|
281
|
+
<span class="published timestamp" data="{time:'Tue Nov 17 01:25:13 +0000 2009'}">5:25 PM Nov 16th</span>
|
282
|
+
</a>
|
283
|
+
<span>from web</span>
|
284
|
+
|
285
|
+
</span>
|
286
|
+
</span>
|
287
|
+
</li>
|
288
|
+
<li class="hentry u-twitter status" id="status_5781410369">
|
289
|
+
<span class="status-body">
|
290
|
+
<span class="entry-content">Maintenance window Tuesday, November 17th at 11p Pacific <a href="http://bit.ly/4AqkLs" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/4AqkLs</a></span>
|
291
|
+
<span class="meta entry-meta">
|
292
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5781410369">
|
293
|
+
<span class="published timestamp" data="{time:'Tue Nov 17 00:41:43 +0000 2009'}">4:41 PM Nov 16th</span>
|
294
|
+
</a>
|
295
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
296
|
+
|
297
|
+
</span>
|
298
|
+
</span>
|
299
|
+
</li>
|
300
|
+
<li class="hentry u-twitter status" id="status_5778942639">
|
301
|
+
<span class="status-body">
|
302
|
+
<span class="entry-content">Official pics from the shiny new Twitter HQ! <a href="http://bit.ly/2oVk85" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/2oVk85</a></span>
|
303
|
+
<span class="meta entry-meta">
|
304
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5778942639">
|
305
|
+
<span class="published timestamp" data="{time:'Mon Nov 16 23:08:56 +0000 2009'}">3:08 PM Nov 16th</span>
|
306
|
+
</a>
|
307
|
+
<span>from web</span>
|
308
|
+
|
309
|
+
</span>
|
310
|
+
</span>
|
311
|
+
</li>
|
312
|
+
<li class="hentry u-twitter status" id="status_5772564273">
|
313
|
+
<span class="status-body">
|
314
|
+
<span class="entry-content">Feels like the 1st day of school at the new office! Here's one last tribute to the old HQ: <a href="http://bit.ly/1BkDvi" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/1BkDvi</a></span>
|
315
|
+
<span class="meta entry-meta">
|
316
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5772564273">
|
317
|
+
<span class="published timestamp" data="{time:'Mon Nov 16 18:56:52 +0000 2009'}">10:56 AM Nov 16th</span>
|
318
|
+
</a>
|
319
|
+
<span>from web</span>
|
320
|
+
|
321
|
+
</span>
|
322
|
+
</span>
|
323
|
+
</li>
|
324
|
+
<li class="hentry u-twitter status" id="status_5742847664">
|
325
|
+
<span class="status-body">
|
326
|
+
<span class="entry-content">Tweeps from 21 states and 5 countries gathered with @<a class="tweet-url username" href="/nasa">nasa</a> to tweet first-hand about the space shuttle launch tomorrow. <a href="http://bit.ly/nyOrn" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/nyOrn</a></span>
|
327
|
+
<span class="meta entry-meta">
|
328
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5742847664">
|
329
|
+
<span class="published timestamp" data="{time:'Sun Nov 15 19:05:27 +0000 2009'}">11:05 AM Nov 15th</span>
|
330
|
+
</a>
|
331
|
+
<span>from web</span>
|
332
|
+
|
333
|
+
</span>
|
334
|
+
</span>
|
335
|
+
</li>
|
336
|
+
<li class="hentry u-twitter status" id="status_5687484356">
|
337
|
+
<span class="status-body">
|
338
|
+
<span class="entry-content">Missing "in reply to" links <a href="http://bit.ly/3HIBNe" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/3HIBNe</a></span>
|
339
|
+
<span class="meta entry-meta">
|
340
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5687484356">
|
341
|
+
<span class="published timestamp" data="{time:'Fri Nov 13 18:52:05 +0000 2009'}">10:52 AM Nov 13th</span>
|
342
|
+
</a>
|
343
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
344
|
+
|
345
|
+
</span>
|
346
|
+
</span>
|
347
|
+
</li>
|
348
|
+
<li class="hentry u-twitter status" id="status_5686368225">
|
349
|
+
<span class="status-body">
|
350
|
+
<span class="entry-content">Lots to look forward to next week with the big move to our new headquarters. @<a class="tweet-url username" href="/sara">sara</a> has outdone herself on the interior design! <a href="/search?q=%23twitterhq" title="#twitterhq" class="tweet-url hashtag">#twitterhq</a></span>
|
351
|
+
<span class="meta entry-meta">
|
352
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5686368225">
|
353
|
+
<span class="published timestamp" data="{time:'Fri Nov 13 18:09:17 +0000 2009'}">10:09 AM Nov 13th</span>
|
354
|
+
</a>
|
355
|
+
<span>from web</span>
|
356
|
+
|
357
|
+
</span>
|
358
|
+
</span>
|
359
|
+
</li>
|
360
|
+
<li class="hentry u-twitter status" id="status_5632730783">
|
361
|
+
<span class="status-body">
|
362
|
+
<span class="entry-content">Retweet feature temporarily disabled <a href="http://bit.ly/TU4h3" class="tweet-url web" rel="nofollow" target="_blank">http://bit.ly/TU4h3</a></span>
|
363
|
+
<span class="meta entry-meta">
|
364
|
+
<a class="entry-date" rel="bookmark" href="http://twitter.com/twitter/status/5632730783">
|
365
|
+
<span class="published timestamp" data="{time:'Wed Nov 11 23:18:48 +0000 2009'}">3:18 PM Nov 11th</span>
|
366
|
+
</a>
|
367
|
+
<span>from <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a></span>
|
368
|
+
|
369
|
+
</span>
|
370
|
+
</span>
|
371
|
+
</li>
|
372
|
+
</ol> <div id="pagination">
|
373
|
+
<a href="/twitter?max_id=6191506635&page=2&twttr=true" class="round more" id="more" rel="next">more</a> </div>
|
374
|
+
|
375
|
+
</div>
|
376
|
+
|
377
|
+
|
378
|
+
|
379
|
+
</div>
|
380
|
+
</td>
|
381
|
+
|
382
|
+
<td id="side_base" class="column round-right">
|
383
|
+
|
384
|
+
<div id="side">
|
385
|
+
|
386
|
+
<div id="profile" class="section profile-side">
|
387
|
+
<span class="section-links">
|
388
|
+
</span>
|
389
|
+
<address>
|
390
|
+
<ul class="about vcard entry-author">
|
391
|
+
|
392
|
+
|
393
|
+
|
394
|
+
<li><span class="label">Name</span> <span class="fn">Twitter</span></li>
|
395
|
+
<li><span class="label">Location</span> <span class="adr">San Francisco, CA</span></li>
|
396
|
+
<li><span class="label">Web</span> <a href="http://twitter.com" class="url" rel="me nofollow" target="_blank">http://twitter.com</a></li>
|
397
|
+
<li id="bio"><span class="label">Bio</span> <span class="bio">Always wondering what's happening. </span></li>
|
398
|
+
|
399
|
+
</ul>
|
400
|
+
</address>
|
401
|
+
|
402
|
+
|
403
|
+
|
404
|
+
<div class="stats">
|
405
|
+
<table>
|
406
|
+
<tr>
|
407
|
+
<td>
|
408
|
+
|
409
|
+
<a href="/twitter/following" id="following_count_link" class="link-following_page" rel="me" title="See who twitter is following">
|
410
|
+
<span id="following_count" class="stats_count numeric">123 </span>
|
411
|
+
<span class="label">Following</span>
|
412
|
+
</a>
|
413
|
+
|
414
|
+
</td>
|
415
|
+
<td>
|
416
|
+
|
417
|
+
<a href="/twitter/followers" id="follower_count_link" class="link-followers_page" rel="me" title="See who's following twitter">
|
418
|
+
<span id="follower_count" class="stats_count numeric">2,700,999 </span>
|
419
|
+
<span class="label">Followers</span>
|
420
|
+
</a>
|
421
|
+
|
422
|
+
</td>
|
423
|
+
<td>
|
424
|
+
|
425
|
+
<a href="/twitter/lists/memberships" id="lists_count_link" class="link-lists_page" rel="me" title="See which lists twitter is on">
|
426
|
+
<span id="lists_count" class="stats_count numeric">12,392 </span>
|
427
|
+
<span class="label">Listed</span>
|
428
|
+
</a>
|
429
|
+
|
430
|
+
</td>
|
431
|
+
</tr>
|
432
|
+
</table>
|
433
|
+
</div>
|
434
|
+
|
435
|
+
</div>
|
436
|
+
|
437
|
+
<ul id="primary_nav" class="sidebar-menu">
|
438
|
+
<li id="profile_tab"><a href="/twitter" accesskey="u"><span id="update_count" class="stat_count">592</span><span>Tweets</span></a></li>
|
439
|
+
<li id="profile_favorites_tab"><a href="http://twitter.com/twitter/favorites" accesskey="f"><span>Favorites</span></a></li>
|
440
|
+
</ul>
|
441
|
+
|
442
|
+
|
443
|
+
<hr/>
|
444
|
+
<div id="side_lists">
|
445
|
+
<h2 class="sidebar-title"><span>Lists</span></h2>
|
446
|
+
|
447
|
+
<ul class="sidebar-menu lists-links">
|
448
|
+
<li><a href="/twitter/team" class="list_574" data="{"dispatch_action":"list","mode":"public","description":"","uri":"/twitter/team","subscriber_count":60480,"slug":"team","full_name":"@twitter/team","user":"twitter","name":"Team","id":574,"member_count":124}" title="@twitter/Team"><span>@twitter/<wbr/><b>team</b></span></a></li>
|
449
|
+
</ul>
|
450
|
+
<p class="sidebar-menu sidebar-menu-actions">
|
451
|
+
<span class="view-all"><a href="http://twitter.com/twitter/lists">View all</a></span>
|
452
|
+
</p>
|
453
|
+
</div>
|
454
|
+
|
455
|
+
|
456
|
+
<hr/>
|
457
|
+
|
458
|
+
|
459
|
+
<div id="following">
|
460
|
+
<h2 class="sidebar-title" id="fm_menu"><span>Following</span></h2>
|
461
|
+
|
462
|
+
<div class="sidebar-menu">
|
463
|
+
<div id="following_list">
|
464
|
+
|
465
|
+
<span class="vcard">
|
466
|
+
<a href="/troyholden" class="url" hreflang="en" rel="contact" title="troyholden"><img alt="troyholden" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/471907441/4002551589_23daaeeca3_mini.jpg" width="24" /></a> </span>
|
467
|
+
|
468
|
+
|
469
|
+
<span class="vcard">
|
470
|
+
<a href="/twitter_fr" class="url" hreflang="en" rel="contact" title="Twitter Français"><img alt="Twitter Français" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/534477089/twitter_bird_profile_mini.png" width="24" /></a> </span>
|
471
|
+
|
472
|
+
|
473
|
+
<span class="vcard">
|
474
|
+
<a href="/jreichhold" class="url" hreflang="en" rel="contact" title="jreichhold"><img alt="jreichhold" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/54857067/Photo_1_mini.jpg" width="24" /></a> </span>
|
475
|
+
|
476
|
+
|
477
|
+
<span class="vcard">
|
478
|
+
<a href="/imownbey" class="url" hreflang="en" rel="contact" title="Ian Miles Ownbey"><img alt="Ian Miles Ownbey" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/64790496/Photo_2_mini.jpg" width="24" /></a> </span>
|
479
|
+
|
480
|
+
|
481
|
+
<span class="vcard">
|
482
|
+
<a href="/dongwang218" class="url" hreflang="en" rel="contact" title="Dong Wang"><img alt="Dong Wang" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/382072008/dong_mini.jpg" width="24" /></a> </span>
|
483
|
+
|
484
|
+
|
485
|
+
<span class="vcard">
|
486
|
+
<a href="/bsuto" class="url" hreflang="en" rel="contact" title="Brian Sutorius"><img alt="Brian Sutorius" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/452951844/Screen_shot_2009-10-04_at_3.39.37_PM_mini.png" width="24" /></a> </span>
|
487
|
+
|
488
|
+
|
489
|
+
<span class="vcard">
|
490
|
+
<a href="/BFF" class="url" hreflang="en" rel="contact" title="Brandi"><img alt="Brandi" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/511212714/care-bears_best-friends_mini.jpg" width="24" /></a> </span>
|
491
|
+
|
492
|
+
|
493
|
+
<span class="vcard">
|
494
|
+
<a href="/dino" class="url" hreflang="en" rel="contact" title="Dino"><img alt="Dino" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/508083749/Photo_2_mini.jpg" width="24" /></a> </span>
|
495
|
+
|
496
|
+
|
497
|
+
<span class="vcard">
|
498
|
+
<a href="/francesca" class="url" hreflang="en" rel="contact" title="Francesca"><img alt="Francesca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/115081740/Singlephoto_mini.jpg" width="24" /></a> </span>
|
499
|
+
|
500
|
+
|
501
|
+
<span class="vcard">
|
502
|
+
<a href="/th" class="url" hreflang="en" rel="contact" title="taylor harwin"><img alt="taylor harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/492316898/IMG_0058_mini.JPG" width="24" /></a> </span>
|
503
|
+
|
504
|
+
|
505
|
+
<span class="vcard">
|
506
|
+
<a href="/taylorharwin" class="url" hreflang="fr" rel="contact" title="Taylor Harwin"><img alt="Taylor Harwin" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/475719652/2847_626906472242_105819_36830072_2929508_n_mini.jpg" width="24" /></a> </span>
|
507
|
+
|
508
|
+
|
509
|
+
<span class="vcard">
|
510
|
+
<a href="/mischahere" class="url" hreflang="en" rel="contact" title="Mischa Nachtigal"><img alt="Mischa Nachtigal" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/435895728/waldo_mini.jpg" width="24" /></a> </span>
|
511
|
+
|
512
|
+
|
513
|
+
<span class="vcard">
|
514
|
+
<a href="/cayley" class="url" hreflang="en" rel="contact" title="Cayley Torgeson"><img alt="Cayley Torgeson" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/446923096/IMG_0072_mini.jpg" width="24" /></a> </span>
|
515
|
+
|
516
|
+
|
517
|
+
<span class="vcard">
|
518
|
+
<a href="/meetutkarsh" class="url" hreflang="en" rel="contact" title="Utkarsh Srivastava"><img alt="Utkarsh Srivastava" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/193389882/IMG_1906_mini.JPG" width="24" /></a> </span>
|
519
|
+
|
520
|
+
|
521
|
+
<span class="vcard">
|
522
|
+
<a href="/twitter_es" class="url" hreflang="en" rel="contact" title="Twitter Español"><img alt="Twitter Español" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/504883319/twitter_bird_profile_mini.png" width="24" /></a> </span>
|
523
|
+
|
524
|
+
|
525
|
+
<span class="vcard">
|
526
|
+
<a href="/rion" class="url" hreflang="en" rel="contact" title="Rion"><img alt="Rion" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/182109900/Photo_8_mini.jpg" width="24" /></a> </span>
|
527
|
+
|
528
|
+
|
529
|
+
<span class="vcard">
|
530
|
+
<a href="/nancyjconnery" class="url" hreflang="en" rel="contact" title="Nancy Connery"><img alt="Nancy Connery" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/93080012/Mommy_and_Morgan_mini.jpg" width="24" /></a> </span>
|
531
|
+
|
532
|
+
|
533
|
+
<span class="vcard">
|
534
|
+
<a href="/tamtam2" class="url" hreflang="en" rel="contact" title="tamtam2"><img alt="tamtam2" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/458987118/Taimay_Red_Outside_2_half_mini.jpg" width="24" /></a> </span>
|
535
|
+
|
536
|
+
|
537
|
+
<span class="vcard">
|
538
|
+
<a href="/em33" class="url" hreflang="en" rel="contact" title="Emee"><img alt="Emee" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/61480913/em_mini.PNG" width="24" /></a> </span>
|
539
|
+
|
540
|
+
|
541
|
+
<span class="vcard">
|
542
|
+
<a href="/andr8a" class="url" hreflang="en" rel="contact" title="andr8a"><img alt="andr8a" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/81960525/kitty_mini.jpg" width="24" /></a> </span>
|
543
|
+
|
544
|
+
|
545
|
+
<span class="vcard">
|
546
|
+
<a href="/keerthi" class="url" hreflang="en" rel="contact" title="Keerthi Prakash"><img alt="Keerthi Prakash" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/104888409/keerthi_2_2__mini.jpg" width="24" /></a> </span>
|
547
|
+
|
548
|
+
|
549
|
+
<span class="vcard">
|
550
|
+
<a href="/Lukester" class="url" hreflang="en" rel="contact" title="Luke "><img alt="Luke " class="photo fn" height="24" src="http://a1.twimg.com/profile_images/543545486/image_mini.jpg" width="24" /></a> </span>
|
551
|
+
|
552
|
+
|
553
|
+
<span class="vcard">
|
554
|
+
<a href="/sean" class="url" hreflang="ja" rel="contact" title="Sean"><img alt="Sean" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/468339255/645882267_8CRyH-L_mini.jpg" width="24" /></a> </span>
|
555
|
+
|
556
|
+
|
557
|
+
<span class="vcard">
|
558
|
+
<a href="/che" class="url" hreflang="en" rel="contact" title="Cheryl Palarca"><img alt="Cheryl Palarca" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/399472616/Picture_2a_mini.jpg" width="24" /></a> </span>
|
559
|
+
|
560
|
+
|
561
|
+
<span class="vcard">
|
562
|
+
<a href="/lg" class="url" hreflang="en" rel="contact" title="Larry Gadea"><img alt="Larry Gadea" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/53283340/q90406211_4464_mini.jpg" width="24" /></a> </span>
|
563
|
+
|
564
|
+
|
565
|
+
<span class="vcard">
|
566
|
+
<a href="/tiger" class="url" hreflang="en" rel="contact" title="Emily"><img alt="Emily" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257293259/724px-Tigerente_mini.jpg" width="24" /></a> </span>
|
567
|
+
|
568
|
+
|
569
|
+
<span class="vcard">
|
570
|
+
<a href="/mattknox" class="url" hreflang="en" rel="contact" title="matt knox"><img alt="matt knox" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/257712871/n14602342_30779909_9881_mini.jpg" width="24" /></a> </span>
|
571
|
+
|
572
|
+
|
573
|
+
<span class="vcard">
|
574
|
+
<a href="/Magnuson" class="url" hreflang="en" rel="contact" title="Charles Magnuson"><img alt="Charles Magnuson" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/371840833/twitter_pic_mini.jpg" width="24" /></a> </span>
|
575
|
+
|
576
|
+
|
577
|
+
<span class="vcard">
|
578
|
+
<a href="/Charles" class="url" hreflang="en" rel="contact" title="Charles"><img alt="Charles" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/529789365/4099673652_248e60ccf5_mini.jpg" width="24" /></a> </span>
|
579
|
+
|
580
|
+
|
581
|
+
<span class="vcard">
|
582
|
+
<a href="/rsarver" class="url" hreflang="en" rel="contact" title="Ryan Sarver"><img alt="Ryan Sarver" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53700173/2448434960_65aba38823_t_mini.jpg" width="24" /></a> </span>
|
583
|
+
|
584
|
+
|
585
|
+
<span class="vcard">
|
586
|
+
<a href="/ElizaSwan" class="url" hreflang="en" rel="contact" title="Robin"><img alt="Robin" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/260125397/icon_mini.png" width="24" /></a> </span>
|
587
|
+
|
588
|
+
|
589
|
+
<span class="vcard">
|
590
|
+
<a href="/sam" class="url" hreflang="en" rel="contact" title="Sam Luckenbill"><img alt="Sam Luckenbill" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/65334155/601879580_479440f611_mini.jpg" width="24" /></a> </span>
|
591
|
+
|
592
|
+
|
593
|
+
<span class="vcard">
|
594
|
+
<a href="/noradio" class="url" hreflang="en" rel="contact" title="Marcel Molina"><img alt="Marcel Molina" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/53473799/marcel-euro-rails-conf_mini.jpg" width="24" /></a> </span>
|
595
|
+
|
596
|
+
|
597
|
+
<span class="vcard">
|
598
|
+
<a href="/bakineggs" class="url" hreflang="en" rel="contact" title="Dan Barry"><img alt="Dan Barry" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/59318058/n30408690_33614537_9476_square_mini.jpg" width="24" /></a> </span>
|
599
|
+
|
600
|
+
|
601
|
+
<span class="vcard">
|
602
|
+
<a href="/emaland" class="url" hreflang="en" rel="contact" title="emaland"><img alt="emaland" class="photo fn" height="24" src="http://a3.twimg.com/profile_images/68049269/n707915915_121_mini.jpg" width="24" /></a> </span>
|
603
|
+
|
604
|
+
|
605
|
+
<span class="vcard">
|
606
|
+
<a href="/ablegrape" class="url" hreflang="en" rel="contact" title="Doug Cook"><img alt="Doug Cook" class="photo fn" height="24" src="http://a1.twimg.com/profile_images/76892852/sticker2_mini.jpg" width="24" /></a> </span>
|
607
|
+
|
608
|
+
|
609
|
+
</div>
|
610
|
+
<div id="friends_view_all">
|
611
|
+
<a href="/twitter/following" rel="me">View all…</a>
|
612
|
+
</div>
|
613
|
+
|
614
|
+
</div>
|
615
|
+
|
616
|
+
|
617
|
+
</div>
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
|
622
|
+
<div id="rssfeed">
|
623
|
+
<hr/>
|
624
|
+
<a href="/statuses/user_timeline/783214.rss" class="xref rss profile-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's tweets</a>
|
625
|
+
<a href="/favorites/783214.rss" class="xref rss favorites-rss" rel="alternate" type="application/rss+xml">RSS feed of twitter's favorites</a>
|
626
|
+
</div>
|
627
|
+
|
628
|
+
|
629
|
+
</div>
|
630
|
+
</td>
|
631
|
+
|
632
|
+
</tr>
|
633
|
+
</tbody>
|
634
|
+
</table>
|
635
|
+
|
636
|
+
|
637
|
+
|
638
|
+
<div id="footer"
|
639
|
+
class="round">
|
640
|
+
<h3 class="offscreen">Footer</h3>
|
641
|
+
|
642
|
+
|
643
|
+
<ul>
|
644
|
+
<li class="first">© 2009 Twitter</li>
|
645
|
+
<li><a href="/about#about">About Us</a></li>
|
646
|
+
<li><a href="/about#contact">Contact</a></li>
|
647
|
+
<li><a href="http://blog.twitter.com">Blog</a></li>
|
648
|
+
<li><a href="http://status.twitter.com">Status</a></li>
|
649
|
+
<li><a href="/goodies">Goodies</a></li>
|
650
|
+
<li><a href="http://apiwiki.twitter.com/">API</a></li>
|
651
|
+
<li><a href="http://business.twitter.com/twitter101">Business</a></li>
|
652
|
+
<li><a href="http://help.twitter.com">Help</a></li>
|
653
|
+
<li><a href="/jobs">Jobs</a></li>
|
654
|
+
<li><a href="/tos">Terms</a></li>
|
655
|
+
<li><a href="/privacy">Privacy</a></li>
|
656
|
+
</ul>
|
657
|
+
</div>
|
658
|
+
|
659
|
+
|
660
|
+
|
661
|
+
<hr />
|
662
|
+
|
663
|
+
</div>
|
664
|
+
|
665
|
+
|
666
|
+
|
667
|
+
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.0/jquery.min.js" type="text/javascript"></script>
|
668
|
+
<script src="http://a0.twimg.com/a/1259091217/javascripts/twitter.js?1259117012" type="text/javascript"></script>
|
669
|
+
<script src="http://a2.twimg.com/a/1259091217/javascripts/lib/jquery.tipsy.min.js?1259117012" type="text/javascript"></script>
|
670
|
+
<script type="text/javascript">
|
671
|
+
//<![CDATA[
|
672
|
+
page.user_screenname = 'twitter';
|
673
|
+
page.user_fullname = 'Twitter';
|
674
|
+
page.controller_name = 'AccountController';
|
675
|
+
page.action_name = 'profile';
|
676
|
+
twttr.form_authenticity_token = '7e394453cc9d849cab133ccb3180e8ae4e6a0258';
|
677
|
+
// FIXME: Reconcile with the kinds on the Status model.
|
678
|
+
twttr.statusKinds = {
|
679
|
+
UPDATE: 1,
|
680
|
+
SHARE: 2
|
681
|
+
};
|
682
|
+
twttr.ListPerUserLimit = 20;
|
683
|
+
|
684
|
+
|
685
|
+
//]]>
|
686
|
+
</script>
|
687
|
+
<script type="text/javascript">
|
688
|
+
//<![CDATA[
|
689
|
+
|
690
|
+
$( function () {
|
691
|
+
initializePage();
|
692
|
+
|
693
|
+
});
|
694
|
+
|
695
|
+
//]]>
|
696
|
+
</script>
|
697
|
+
|
698
|
+
<!-- BEGIN google analytics -->
|
699
|
+
|
700
|
+
<script type="text/javascript">
|
701
|
+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
702
|
+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
703
|
+
</script>
|
704
|
+
|
705
|
+
<script type="text/javascript">
|
706
|
+
|
707
|
+
try {
|
708
|
+
var pageTracker = _gat._getTracker("UA-30775-6");
|
709
|
+
pageTracker._setDomainName("twitter.com");
|
710
|
+
pageTracker._setVar('Not Logged In');
|
711
|
+
pageTracker._setVar('lang: en');
|
712
|
+
pageTracker._initData();
|
713
|
+
pageTracker._trackPageview('/profile/not_logged_in/twitter');
|
714
|
+
} catch(err) { }
|
715
|
+
|
716
|
+
</script>
|
717
|
+
|
718
|
+
<!-- END google analytics -->
|
719
|
+
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
|
724
|
+
<div id="notifications"></div>
|
725
|
+
|
726
|
+
|
727
|
+
|
728
|
+
<!-- ERB -->
|
729
|
+
</body>
|
730
|
+
|
731
|
+
</html>
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
|
2
|
+
|
3
|
+
describe Scraper do
|
4
|
+
before :all do
|
5
|
+
@uri = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'twitter.html' ))
|
6
|
+
@pattern = ".status"
|
7
|
+
class Entry < Object
|
8
|
+
attr_accessor :text, :date
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
before :each do
|
13
|
+
Scraper.scrapers = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "defining a scraper" do
|
17
|
+
it "should create a new scraper using the specified name" do
|
18
|
+
Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) {}
|
19
|
+
|
20
|
+
Scraper(:s).scraper_source.should == @uri
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be set with and without block" do
|
24
|
+
Scraper.define(:s, :class => :entry, :source => @uri, :node => @pattern) do |s|
|
25
|
+
s.text "foo"
|
26
|
+
s.date {"bar"}
|
27
|
+
end
|
28
|
+
|
29
|
+
@objects = Scraper.parse(:s)
|
30
|
+
@objects.first.text.should == "foo"
|
31
|
+
@objects.first.date.should == "bar"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should get the objects from twitter" do
|
35
|
+
Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
|
36
|
+
s.text { |node| node.at(".entry-content").inner_html }
|
37
|
+
s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
|
38
|
+
end
|
39
|
+
|
40
|
+
@objects = Scraper.parse(:twitter)
|
41
|
+
@objects.size.should == 20
|
42
|
+
@objects.first.text.should == "SMS delivery issues on AT&T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
|
43
|
+
@objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should use a different html parser" do
|
47
|
+
require 'nokogiri'
|
48
|
+
Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
|
49
|
+
|
50
|
+
Scraper.define(:twitter, :class => :entry, :source => @uri, :node => @pattern) do |s|
|
51
|
+
s.text { |node| node.at(".entry-content").inner_html }
|
52
|
+
s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
|
53
|
+
end
|
54
|
+
|
55
|
+
@objects = Scraper.parse(:twitter)
|
56
|
+
@objects.size.should == 20
|
57
|
+
@objects.first.text.should == "SMS delivery issues on AT&T <a href=\"http://bit.ly/7JFJ6H\" class=\"tweet-url web\" rel=\"nofollow\" target=\"_blank\">http://bit.ly/7JFJ6H</a>"
|
58
|
+
@objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: object-scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Enrico Genauck
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-10 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.2
|
24
|
+
version:
|
25
|
+
description: Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.
|
26
|
+
email: kontakt@enricogenauck.de
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.rdoc
|
33
|
+
- lib/object-scraper.rb
|
34
|
+
- lib/object-scraper/scraper.rb
|
35
|
+
files:
|
36
|
+
- Manifest
|
37
|
+
- README.rdoc
|
38
|
+
- Rakefile
|
39
|
+
- lib/object-scraper.rb
|
40
|
+
- lib/object-scraper/scraper.rb
|
41
|
+
- object-scraper.gemspec
|
42
|
+
- spec/data/twitter.html
|
43
|
+
- spec/object-scraper/scraper_spec.rb
|
44
|
+
- spec/spec.opts
|
45
|
+
- spec/spec_helper.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://github.com/enricogenauck/object-scraper
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --line-numbers
|
53
|
+
- --inline-source
|
54
|
+
- --title
|
55
|
+
- Object-scraper
|
56
|
+
- --main
|
57
|
+
- README.rdoc
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "1.2"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project: object-scraper
|
75
|
+
rubygems_version: 1.3.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Recipe like object extraction from HTML sources
|
79
|
+
test_files: []
|
80
|
+
|