upton 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/upton.rb +86 -116
- data/lib/upton/downloader.rb +126 -0
- data/lib/upton/utils.rb +43 -0
- data/spec/data/propublica.html +269 -269
- data/spec/data/propublica_search.html +388 -0
- data/spec/data/propublica_search_page_2.html +375 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/upton_downloader_spec.rb +75 -0
- data/spec/upton_spec.rb +110 -47
- metadata +26 -3
- data/lib/utils.rb +0 -74
@@ -0,0 +1,388 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
<!DOCTYPE html>
|
5
|
+
|
6
|
+
<html lang="en" xmlns:fb="http://www.facebook.com/2008/fbml">
|
7
|
+
<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#">
|
8
|
+
<meta http-equiv="X-UA-Compatible" content="IE=Edge;chrome=1">
|
9
|
+
|
10
|
+
|
11
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
12
|
+
|
13
|
+
<!-- ProPublica Typekit account -->
|
14
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/typekit.js?1349797800"></script>
|
15
|
+
|
16
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/all.js"></script>
|
17
|
+
|
18
|
+
|
19
|
+
<script type="text/javascript">var _sf_startpt=(new Date()).getTime()</script>
|
20
|
+
<meta name="google-site-verification" content="xPf4zxBdgQHZIRLVMiZyNYR1igMQUhsHogunEnOtD1k" />
|
21
|
+
|
22
|
+
<link rel="stylesheet" href="//www.propublica.org/css/960/reset.css?2012101501" type="text/css" media="all" charset="utf-8" />
|
23
|
+
<link rel="stylesheet" href="//www.propublica.org/css/960/propublica-text.css?2012101501" type="text/css" media="all" charset="utf-8" />
|
24
|
+
<link rel="stylesheet" href="//www.propublica.org/css/master.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
25
|
+
|
26
|
+
<link rel="stylesheet" href="//www.propublica.org/css/print.css?2012101501" type="text/css" media="print" charset="utf-8" />
|
27
|
+
|
28
|
+
<link rel="stylesheet" href="//www.propublica.org/css/headfoot2011.css?2012101501" type="text/css" media="all" charset="utf-8">
|
29
|
+
|
30
|
+
<!-- adaptive -->
|
31
|
+
<meta name="viewport" content="width=device-width">
|
32
|
+
<link rel="stylesheet" type="text/css" media="screen and (max-width: 480px)" href="//www.propublica.org/css/woland.css?2012101501" charset="utf-8">
|
33
|
+
<link rel="stylesheet" type="text/css" href="//www.propublica.org/css/zoom-fix.css" charset="UTF-8">
|
34
|
+
<!-- <link rel="stylesheet" type="text/css" media="screen and (max-width: 800px) and (min-width: 481px)" href="//www.propublica.org/css/behemoth.css?2012101501" charset="utf-8" /> -->
|
35
|
+
<!-- /adaptive -->
|
36
|
+
|
37
|
+
<!--[if IE]>
|
38
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
39
|
+
<![endif]-->
|
40
|
+
<!--[if IE 7]>
|
41
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie7.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
42
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie7-2011.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
43
|
+
<![endif]-->
|
44
|
+
<!--[if lt IE 7]>
|
45
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie6.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
46
|
+
<script src="//www.propublica.org/js/DD_belatedPNG_0.0.8a-min.js" type="text/javascript" charset="utf-8"></script>
|
47
|
+
<script src="//www.propublica.org/js/pngfix.js type="text/javascript" charset="utf-8"></script>
|
48
|
+
<![endif]-->
|
49
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads_boot.js"></script>
|
50
|
+
|
51
|
+
<title>ProPublica | Search results for tools</title>
|
52
|
+
</head>
|
53
|
+
<body class="search-results">
|
54
|
+
|
55
|
+
<title>ProPublica</title>
|
56
|
+
</head>
|
57
|
+
<body class="search-results old-page-new-wrapper">
|
58
|
+
|
59
|
+
<header class="shell">
|
60
|
+
<a href="/" id="dorothy" class="wNarrow"></a>
|
61
|
+
<aside id="header-donate-mobile" class="wMed"><h1><a href="https://www.propublica.org/mobile/donate">Donate</a></h1></aside>
|
62
|
+
<div id="h-wafer1">
|
63
|
+
<div class="wrapper">
|
64
|
+
<nav id="dont-miss">
|
65
|
+
<div class="wrapper unwrapper">
|
66
|
+
</div>
|
67
|
+
</nav>
|
68
|
+
</div><!-- /wrapper -->
|
69
|
+
</div><!-- /h-wafer-1 -->
|
70
|
+
<div id="h-wafer2">
|
71
|
+
<div class="wrapper">
|
72
|
+
<div class="donate">
|
73
|
+
<h1><a href="https://propublica.org/site/donate/">Donate</a></h1>
|
74
|
+
</div>
|
75
|
+
<div id="seo-hgroup">
|
76
|
+
<p><a href="/" class="pngfix">ProPublica</a></p>
|
77
|
+
<h2>Journalism in the Public Interest</h2>
|
78
|
+
</div>
|
79
|
+
<form class="has-xid email-sub" id='freeform' method="post" action="http://www.propublica.org/" >
|
80
|
+
<div class='hiddenFields'>
|
81
|
+
<input type="hidden" name="ACT" value="40" />
|
82
|
+
<input type="hidden" name="URI" value="/rails/header2-search_2011" />
|
83
|
+
<input type="hidden" name="status" value="open" />
|
84
|
+
<input type="hidden" name="return" value="/forms/thank_you" />
|
85
|
+
<input type="hidden" name="redirect_on_duplicate" value="" />
|
86
|
+
<input type="hidden" name="dynamic_xid" value="y" />
|
87
|
+
<input type="hidden" name="disable_xid" value="y" />
|
88
|
+
<input type="hidden" name="tracker_email_one" value="" />
|
89
|
+
<input type="hidden" name="tracker_template_one" value="" />
|
90
|
+
<input type="hidden" name="RET" value="http://www.propublica.org/rails/header2-search_2011/" />
|
91
|
+
<input type="hidden" name="form_name" value="daily_email_form" />
|
92
|
+
<input type="hidden" name="ajax_request" value="n" />
|
93
|
+
<input type="hidden" name="params_id" value="79668560" />
|
94
|
+
<input type="hidden" name="site_id" value="1" />
|
95
|
+
</div>
|
96
|
+
|
97
|
+
|
98
|
+
<input type='hidden' value='1' name='subscribe_daily_email' /><!-- .banner_2011 hi -->
|
99
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-mail-28.png">
|
100
|
+
<label for="email">Receive our top stories daily</label>
|
101
|
+
<input type="email" name="email" placeholder="Email address">
|
102
|
+
<input type="submit" value="Subscribe">
|
103
|
+
<div class="popper">
|
104
|
+
<input type="text" name="postalcode" placeholder="Zip-code">
|
105
|
+
<label for="zipcode">optional</label>
|
106
|
+
</div>
|
107
|
+
</form>
|
108
|
+
</div><!-- /wrapper -->
|
109
|
+
</div><!-- /h-wafer-2 -->
|
110
|
+
|
111
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads_boot.js?2012101501" charset="utf-8"></script>
|
112
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads.js?2012101501" charset="utf-8"></script>
|
113
|
+
|
114
|
+
<div id="h-wafer3">
|
115
|
+
<div class="wrapper">
|
116
|
+
<nav>
|
117
|
+
<a href="http://www.propublica.org/" >Home</a>
|
118
|
+
<a href="http://www.propublica.org/investigations/" ><span class="wFull">Our Investigations</span><span class="wNarrow">Projects</span></a>
|
119
|
+
<a href="http://www.propublica.org/tools/" class="wMed wFull ">Tools & Data</a>
|
120
|
+
<a href="http://www.propublica.org/muckreads" class="wMed wFull ">MuckReads</a>
|
121
|
+
<a href="http://www.propublica.org/getinvolved" >Get Involved</a>
|
122
|
+
<a href="http://www.propublica.org/about/" class=""><span class="wFull">About Us</span><span class="wNarrow">About</span></a>
|
123
|
+
</nav>
|
124
|
+
<aside class="fb">
|
125
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-fb-top-nav.png">
|
126
|
+
<div class="jspopper">
|
127
|
+
|
128
|
+
<iframe src="//www.facebook.com/plugins/likebox.php?href=http%3A%2F%2Fwww.facebook.com%2Fpropublica&width=292&colorscheme=light&show_faces=true&border_color=%23 525E6A&stream=true&header=true&height=590" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:292px; height:590px; background: #ffffff" allowTransparency="false"></iframe>
|
129
|
+
|
130
|
+
|
131
|
+
</div>
|
132
|
+
</aside>
|
133
|
+
<aside class="tw">
|
134
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-tw-top-nav.png">
|
135
|
+
<div class="popper">
|
136
|
+
|
137
|
+
<h1>ProPublica on Twitter</h1>
|
138
|
+
|
139
|
+
<a class="twitter-timeline" width="285" height="350" data-dnt=true href="https://twitter.com/ProPublica" data-widget-id="250304934812205056">Tweets by @ProPublica</a>
|
140
|
+
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src="//platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
|
141
|
+
|
142
|
+
|
143
|
+
</div>
|
144
|
+
</aside>
|
145
|
+
<div id="search-toggler">
|
146
|
+
<div class="wNarrow search-toggle" id="search-glass"></div>
|
147
|
+
<form action="//www.propublica.org/search/search.php" method="get">
|
148
|
+
<input type="search" placeholder="Search ProPublica" name="q">
|
149
|
+
<input type="image" src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icn-search.png">
|
150
|
+
<div class="wNarrow btn-ios search-toggle">Cancel</div>
|
151
|
+
</form>
|
152
|
+
</div>
|
153
|
+
</div><!-- /wrapper -->
|
154
|
+
</div><!-- /h-wafer-3 -->
|
155
|
+
</header>
|
156
|
+
|
157
|
+
<!-- Top_Pushdown -->
|
158
|
+
<div id='google-Top_Pushdown' style="z-index: 3;" >
|
159
|
+
<script type='text/javascript'>
|
160
|
+
googletag.display('google-Top_Pushdown');
|
161
|
+
</script>
|
162
|
+
</div>
|
163
|
+
|
164
|
+
<div id="content-header">
|
165
|
+
<div class="wrapper">
|
166
|
+
<div class="content-header-text">
|
167
|
+
<h1 class="title-link">
|
168
|
+
Search Results
|
169
|
+
<span class="wNarrow">for <span class="search-term">tools</span></span>
|
170
|
+
</h1>
|
171
|
+
</div>
|
172
|
+
</div><!--/.wrapper-->
|
173
|
+
</div><!--/.content-header-->
|
174
|
+
<div id="content">
|
175
|
+
<div class="wrapper">
|
176
|
+
<div class="content-main">
|
177
|
+
<form action="" class="simple-search">
|
178
|
+
<input type="text" class="text-input" name="q" value="tools">
|
179
|
+
<input type="submit" class="btn-input" value="Search" />
|
180
|
+
</form>
|
181
|
+
|
182
|
+
|
183
|
+
<div class="compact-list-header utility-text">
|
184
|
+
<p class="results-count">
|
185
|
+
1-15 of <strong>22700</strong> results
|
186
|
+
</p>
|
187
|
+
|
188
|
+
</div>
|
189
|
+
|
190
|
+
<div class="compact-list">
|
191
|
+
<div class="compact-list-item">
|
192
|
+
<h2><a href="webinar.html" class="title-link"><b>Tools</b> & Data </a><!-- --></h2>
|
193
|
+
<p><b>Tools</b> & Data. ProPublica's News Applications, Graphics, Databases, and <b>Tools</b>. <b>....</b> Use this <b>tool</b> to find and compare doctors and other top prescribers in 2010.</p>
|
194
|
+
</div> <div class="compact-list-item">
|
195
|
+
<h2><a href="prosecutor.html" class="title-link"><b>Tools</b> and Resources for Reporting the Stimulus </a><!-- --></h2>
|
196
|
+
<p>Jul 13, 2009 … Here's some tips of how to keep track of how federal stimulus dollars are being spent in your community.</p>
|
197
|
+
</div> </div><!--/.results-list-->
|
198
|
+
|
199
|
+
|
200
|
+
|
201
|
+
<div class="stories">
|
202
|
+
<ul>
|
203
|
+
|
204
|
+
</ul>
|
205
|
+
</div>
|
206
|
+
<p><a href="http://www.propublica.org/tag/data+tools">More ...</a></p>
|
207
|
+
</div> -->
|
208
|
+
</div>
|
209
|
+
</div><!--/.results-list-->
|
210
|
+
|
211
|
+
<div class="pagination">
|
212
|
+
<div class="pagination-menu">
|
213
|
+
|
214
|
+
<!-- <li><a href="#">« prev</a></li> -->
|
215
|
+
|
216
|
+
|
217
|
+
<strong>1</strong>
|
218
|
+
|
219
|
+
<a href="./search.php?q=tools&o=r&p=2">2</a>
|
220
|
+
<a href="./search.php?q=tools&o=r&p=3">3</a>
|
221
|
+
<a href="./search.php?q=tools&o=r&p=4">4</a>
|
222
|
+
<a href="./search.php?q=tools&o=r&p=5">5</a>
|
223
|
+
<a href="./search.php?q=tools&o=r&p=6">6</a>
|
224
|
+
<a href="./search.php?q=tools&o=r&p=7">7</a>
|
225
|
+
|
226
|
+
<!-- <li><a href="#">next »</a></li> -->
|
227
|
+
</div>
|
228
|
+
</div>
|
229
|
+
|
230
|
+
<!-- Page 1 of 1514 -->
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
</div><!--/.content-main-->
|
241
|
+
|
242
|
+
<div class="content-right">
|
243
|
+
<div class="updates-module module">
|
244
|
+
<div class="module-top module-content">
|
245
|
+
<h3>Get Updates</h3>
|
246
|
+
</div>
|
247
|
+
<div class="module-btm module-content">
|
248
|
+
<form class="has-xid" id='freeform' method="post" action="http://www.propublica.org/" >
|
249
|
+
<div class='hiddenFields'>
|
250
|
+
<input type="hidden" name="ACT" value="40" />
|
251
|
+
<input type="hidden" name="URI" value="/rails/search_sidebar" />
|
252
|
+
<input type="hidden" name="status" value="open" />
|
253
|
+
<input type="hidden" name="return" value="/forms/thank_you" />
|
254
|
+
<input type="hidden" name="redirect_on_duplicate" value="" />
|
255
|
+
<input type="hidden" name="dynamic_xid" value="y" />
|
256
|
+
<input type="hidden" name="tracker_email_one" value="" />
|
257
|
+
<input type="hidden" name="tracker_template_one" value="" />
|
258
|
+
<input type="hidden" name="RET" value="http://www.propublica.org/rails/search_sidebar/" />
|
259
|
+
<input type="hidden" name="form_name" value="daily_email_form" />
|
260
|
+
<input type="hidden" name="ajax_request" value="y" />
|
261
|
+
<input type="hidden" name="params_id" value="79668565" />
|
262
|
+
<input type="hidden" name="site_id" value="1" />
|
263
|
+
</div>
|
264
|
+
|
265
|
+
|
266
|
+
<input type='hidden' value='1' name='subscribe_daily_email' />
|
267
|
+
<p>Stay on top of what we’re working on by subscribing to our email digest.</p>
|
268
|
+
<div class="form-row text-row">
|
269
|
+
<label for="subscribe-email">email</label> <input type="text" name="email" class="text-input" id="subscribe-email" />
|
270
|
+
</div>
|
271
|
+
<div class="form-row text-row">
|
272
|
+
<label for="subscribe-zip">zip code</label> <input type="text" name="postalcode" class="text-input" id="subscribe-zip" /> <span>optional</span>
|
273
|
+
</div>
|
274
|
+
<div class="form-row buttons-row">
|
275
|
+
<input type="submit" value="Subscribe" class="btn-input" />
|
276
|
+
</div>
|
277
|
+
</form>
|
278
|
+
</div>
|
279
|
+
</div>
|
280
|
+
|
281
|
+
<div class="donate-module module">
|
282
|
+
<div class="module-top module-content">
|
283
|
+
<h3>Safeguard the public interest.</h3>
|
284
|
+
<div class="module-icon pngfix"><!-- --></div>
|
285
|
+
</div>
|
286
|
+
<div class="module-btm module-content">
|
287
|
+
<p>Support ProPublica’s award-winning investigative journalism.</p>
|
288
|
+
<p><a href="http://www.propublica.org/site/donate/" class="btn-input">Donate</a></p>
|
289
|
+
</div>
|
290
|
+
</div><!--/.donate-module-->
|
291
|
+
<div class="simple-follow-module follow-module-single module">
|
292
|
+
<div class="module-top module-content">
|
293
|
+
<h3>Follow ProPublica</h3>
|
294
|
+
<div class="module-icon pngfix"><!-- --></div>
|
295
|
+
</div>
|
296
|
+
<div class="module-btm module-content">
|
297
|
+
<ul>
|
298
|
+
<li><a href="http://www.propublica.org/forms/newsletter_daily_email" class="email-sq-link">Email</a></li>
|
299
|
+
<li><a href="http://www.facebook.com/propublica" class="fb-link">Facebook</a></li>
|
300
|
+
<li><a href="http://www.twitter.com/propublica" class="tw-link">Twitter</a></li>
|
301
|
+
</ul>
|
302
|
+
<ul>
|
303
|
+
<li><a href="http://iphone.propublica.org/" class="apple-link">iPhone App</a></li>
|
304
|
+
<li><a href="/podcast/" class="podcast-link">Podcast</a></li>
|
305
|
+
<li><a href="http://feeds.propublica.org/propublica/main" class="feed-link">RSS</a></li>
|
306
|
+
</ul>
|
307
|
+
<div class="clear"></div>
|
308
|
+
</div>
|
309
|
+
</div><!--/follow-module-->
|
310
|
+
</div><!--/.content-right-->
|
311
|
+
</div><!--/.wrapper-->
|
312
|
+
|
313
|
+
</div><!--/#content-->
|
314
|
+
|
315
|
+
|
316
|
+
<footer class="shell wFull">
|
317
|
+
<div class="wrapper">
|
318
|
+
<a href="https://www.propublica.org/site/donate" id="foot-tab-donate">Donate</a>
|
319
|
+
<section class="copyright">
|
320
|
+
<p id="footer-logo-image">ProPublica</p>
|
321
|
+
<p>
|
322
|
+
© Copyright 2013<br>
|
323
|
+
Pro Publica Inc.
|
324
|
+
</p>
|
325
|
+
</section>
|
326
|
+
<ul>
|
327
|
+
<li><a href="http://www.propublica.org/about/">About Us</a></li>
|
328
|
+
<li><a href="http://propublica.org/about/staff">Staff</a></li>
|
329
|
+
<li><a href="http://propublica.org/about/contact">Contact</a></li>
|
330
|
+
<li><a href="http://propublica.org/about/jobs">Jobs</a></li>
|
331
|
+
<li><a href="http://propublica.org/about/corrections">Complaints and Corrections</a></li>
|
332
|
+
<li><a href="http://propublica.org/about/legal">Privacy Policy and Other Terms</a></li>
|
333
|
+
<li><a href="http://www.propublica.org/about/frequently-asked-questions/">FAQ</a></li>
|
334
|
+
</ul>
|
335
|
+
<ul>
|
336
|
+
<li>Subscribe: <a href="http://www.propublica.org/forms/newsletter_daily_email">Email</a> | <a href="http://feeds.propublica.org/propublica/main">RSS</a></li>
|
337
|
+
<li>Follow us: <a href="http://www.twitter.com/propublica">Twitter</a> | <a href="http://www.facebook.com/propublica">Facebook</a></li>
|
338
|
+
<li><a href="https://twitter.com/ProPublica/propublica-staff">See staff Twitter accounts</a></li>
|
339
|
+
<li><a href="http://www.propublica.org/getinvolved/about">Get Involved @ ProPublica</a></li>
|
340
|
+
<li>Download our apps for:<br> <a href="http://iphone.propublica.org/">iPhone</a> | <a href="https://market.android.com/details?id=com.propublica">Android</a></li>
|
341
|
+
<li><a href="http://www.propublica.org/podcast/">Listen to our Podcast</a></li>
|
342
|
+
</ul>
|
343
|
+
<section class="republish">
|
344
|
+
<a href="http://www.propublica.org/about/steal-our-stories" id="foot-tab-republish">Steal Our Stories</a>
|
345
|
+
<p>Unless otherwise noted, you can republish our stories for free if you <a href="http://www.propublica.org/about/steal-our-stories">follow these rules</a>.</p>
|
346
|
+
<!-- cc_ad -->
|
347
|
+
<div id='google-cc_ad' >
|
348
|
+
<script type='text/javascript'>
|
349
|
+
googletag.display('google-cc_ad');
|
350
|
+
</script>
|
351
|
+
</div>
|
352
|
+
|
353
|
+
</section>
|
354
|
+
</div><!-- /wrapper -->
|
355
|
+
</footer>
|
356
|
+
<footer class="shell wNarrow">
|
357
|
+
<div class="wrapper">
|
358
|
+
<aside class="untoggler">
|
359
|
+
<h1>Safeguard the public interest</h1>
|
360
|
+
<div>
|
361
|
+
<p>Support ProPublica's award-winning investigative journalism.</p>
|
362
|
+
<p class="btn-row"><a href="https://www.propublica.org/mobile/donate" class="btn-ios">Donate</a></p>
|
363
|
+
</div>
|
364
|
+
</aside>
|
365
|
+
<nav>
|
366
|
+
<a href="http://www.propublica.org/about/steal-our-stories">Steal Our Stories <span class="note">follow these rules</span></a>
|
367
|
+
<a href="http://propublica.org/about/legal">Privacy Policy</a>
|
368
|
+
<a href="http://www.propublica.org/forms/newsletter_daily_email">Sign up for our email list</a>
|
369
|
+
<p>© Copyright 2013 Pro Publica Inc.</p>
|
370
|
+
</nav>
|
371
|
+
</div><!-- /wrapper -->
|
372
|
+
</footer>
|
373
|
+
|
374
|
+
|
375
|
+
|
376
|
+
|
377
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/beacons.js"></script>
|
378
|
+
|
379
|
+
|
380
|
+
<script src="http://www.propublica.org/static/fonts/ss-social.js"></script>
|
381
|
+
<script src="http://www.propublica.org/static/fonts/ss-standard.js"></script>
|
382
|
+
|
383
|
+
</body>
|
384
|
+
</html>
|
385
|
+
|
386
|
+
|
387
|
+
</body>
|
388
|
+
</html>
|
@@ -0,0 +1,375 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
<!DOCTYPE html>
|
5
|
+
|
6
|
+
<html lang="en" xmlns:fb="http://www.facebook.com/2008/fbml">
|
7
|
+
<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#">
|
8
|
+
<meta http-equiv="X-UA-Compatible" content="IE=Edge;chrome=1">
|
9
|
+
|
10
|
+
|
11
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
12
|
+
|
13
|
+
<!-- ProPublica Typekit account -->
|
14
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/typekit.js?1349797800"></script>
|
15
|
+
|
16
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/all.js"></script>
|
17
|
+
|
18
|
+
|
19
|
+
<script type="text/javascript">var _sf_startpt=(new Date()).getTime()</script>
|
20
|
+
<meta name="google-site-verification" content="xPf4zxBdgQHZIRLVMiZyNYR1igMQUhsHogunEnOtD1k" />
|
21
|
+
|
22
|
+
<link rel="stylesheet" href="//www.propublica.org/css/960/reset.css?2012101501" type="text/css" media="all" charset="utf-8" />
|
23
|
+
<link rel="stylesheet" href="//www.propublica.org/css/960/propublica-text.css?2012101501" type="text/css" media="all" charset="utf-8" />
|
24
|
+
<link rel="stylesheet" href="//www.propublica.org/css/master.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
25
|
+
|
26
|
+
<link rel="stylesheet" href="//www.propublica.org/css/print.css?2012101501" type="text/css" media="print" charset="utf-8" />
|
27
|
+
|
28
|
+
<link rel="stylesheet" href="//www.propublica.org/css/headfoot2011.css?2012101501" type="text/css" media="all" charset="utf-8">
|
29
|
+
|
30
|
+
<!-- adaptive -->
|
31
|
+
<meta name="viewport" content="width=device-width">
|
32
|
+
<link rel="stylesheet" type="text/css" media="screen and (max-width: 480px)" href="//www.propublica.org/css/woland.css?2012101501" charset="utf-8">
|
33
|
+
<link rel="stylesheet" type="text/css" href="//www.propublica.org/css/zoom-fix.css" charset="UTF-8">
|
34
|
+
<!-- <link rel="stylesheet" type="text/css" media="screen and (max-width: 800px) and (min-width: 481px)" href="//www.propublica.org/css/behemoth.css?2012101501" charset="utf-8" /> -->
|
35
|
+
<!-- /adaptive -->
|
36
|
+
|
37
|
+
<!--[if IE]>
|
38
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
39
|
+
<![endif]-->
|
40
|
+
<!--[if IE 7]>
|
41
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie7.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
42
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie7-2011.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
43
|
+
<![endif]-->
|
44
|
+
<!--[if lt IE 7]>
|
45
|
+
<link rel="stylesheet" href="//www.propublica.org/css/ie6.css?2012101501" type="text/css" media="screen" charset="utf-8" />
|
46
|
+
<script src="//www.propublica.org/js/DD_belatedPNG_0.0.8a-min.js" type="text/javascript" charset="utf-8"></script>
|
47
|
+
<script src="//www.propublica.org/js/pngfix.js type="text/javascript" charset="utf-8"></script>
|
48
|
+
<![endif]-->
|
49
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads_boot.js"></script>
|
50
|
+
|
51
|
+
<title>ProPublica | Search results for tools</title>
|
52
|
+
</head>
|
53
|
+
<body class="search-results">
|
54
|
+
|
55
|
+
<title>ProPublica</title>
|
56
|
+
</head>
|
57
|
+
<body class="search-results old-page-new-wrapper">
|
58
|
+
|
59
|
+
<header class="shell">
|
60
|
+
<a href="/" id="dorothy" class="wNarrow"></a>
|
61
|
+
<aside id="header-donate-mobile" class="wMed"><h1><a href="https://www.propublica.org/mobile/donate">Donate</a></h1></aside>
|
62
|
+
<div id="h-wafer1">
|
63
|
+
<div class="wrapper">
|
64
|
+
<nav id="dont-miss">
|
65
|
+
<div class="wrapper unwrapper">
|
66
|
+
</div>
|
67
|
+
</nav>
|
68
|
+
</div><!-- /wrapper -->
|
69
|
+
</div><!-- /h-wafer-1 -->
|
70
|
+
<div id="h-wafer2">
|
71
|
+
<div class="wrapper">
|
72
|
+
<div class="donate">
|
73
|
+
<h1><a href="https://propublica.org/site/donate/">Donate</a></h1>
|
74
|
+
</div>
|
75
|
+
<div id="seo-hgroup">
|
76
|
+
<p><a href="/" class="pngfix">ProPublica</a></p>
|
77
|
+
<h2>Journalism in the Public Interest</h2>
|
78
|
+
</div>
|
79
|
+
<form class="has-xid email-sub" id='freeform' method="post" action="http://www.propublica.org/" >
|
80
|
+
<div class='hiddenFields'>
|
81
|
+
<input type="hidden" name="ACT" value="40" />
|
82
|
+
<input type="hidden" name="URI" value="/rails/header2-search_2011" />
|
83
|
+
<input type="hidden" name="status" value="open" />
|
84
|
+
<input type="hidden" name="return" value="/forms/thank_you" />
|
85
|
+
<input type="hidden" name="redirect_on_duplicate" value="" />
|
86
|
+
<input type="hidden" name="dynamic_xid" value="y" />
|
87
|
+
<input type="hidden" name="disable_xid" value="y" />
|
88
|
+
<input type="hidden" name="tracker_email_one" value="" />
|
89
|
+
<input type="hidden" name="tracker_template_one" value="" />
|
90
|
+
<input type="hidden" name="RET" value="http://www.propublica.org/rails/header2-search_2011/" />
|
91
|
+
<input type="hidden" name="form_name" value="daily_email_form" />
|
92
|
+
<input type="hidden" name="ajax_request" value="n" />
|
93
|
+
<input type="hidden" name="params_id" value="79668560" />
|
94
|
+
<input type="hidden" name="site_id" value="1" />
|
95
|
+
</div>
|
96
|
+
|
97
|
+
|
98
|
+
<input type='hidden' value='1' name='subscribe_daily_email' /><!-- .banner_2011 hi -->
|
99
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-mail-28.png">
|
100
|
+
<label for="email">Receive our top stories daily</label>
|
101
|
+
<input type="email" name="email" placeholder="Email address">
|
102
|
+
<input type="submit" value="Subscribe">
|
103
|
+
<div class="popper">
|
104
|
+
<input type="text" name="postalcode" placeholder="Zip-code">
|
105
|
+
<label for="zipcode">optional</label>
|
106
|
+
</div>
|
107
|
+
</form>
|
108
|
+
</div><!-- /wrapper -->
|
109
|
+
</div><!-- /h-wafer-2 -->
|
110
|
+
|
111
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads_boot.js?2012101501" charset="utf-8"></script>
|
112
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/google_ads.js?2012101501" charset="utf-8"></script>
|
113
|
+
|
114
|
+
<div id="h-wafer3">
|
115
|
+
<div class="wrapper">
|
116
|
+
<nav>
|
117
|
+
<a href="http://www.propublica.org/" >Home</a>
|
118
|
+
<a href="http://www.propublica.org/investigations/" ><span class="wFull">Our Investigations</span><span class="wNarrow">Projects</span></a>
|
119
|
+
<a href="http://www.propublica.org/tools/" class="wMed wFull ">Tools & Data</a>
|
120
|
+
<a href="http://www.propublica.org/muckreads" class="wMed wFull ">MuckReads</a>
|
121
|
+
<a href="http://www.propublica.org/getinvolved" >Get Involved</a>
|
122
|
+
<a href="http://www.propublica.org/about/" class=""><span class="wFull">About Us</span><span class="wNarrow">About</span></a>
|
123
|
+
</nav>
|
124
|
+
<aside class="fb">
|
125
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-fb-top-nav.png">
|
126
|
+
<div class="jspopper">
|
127
|
+
|
128
|
+
<iframe src="//www.facebook.com/plugins/likebox.php?href=http%3A%2F%2Fwww.facebook.com%2Fpropublica&width=292&colorscheme=light&show_faces=true&border_color=%23 525E6A&stream=true&header=true&height=590" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:292px; height:590px; background: #ffffff" allowTransparency="false"></iframe>
|
129
|
+
|
130
|
+
|
131
|
+
</div>
|
132
|
+
</aside>
|
133
|
+
<aside class="tw">
|
134
|
+
<img src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icon-tw-top-nav.png">
|
135
|
+
<div class="popper">
|
136
|
+
|
137
|
+
<h1>ProPublica on Twitter</h1>
|
138
|
+
|
139
|
+
<a class="twitter-timeline" width="285" height="350" data-dnt=true href="https://twitter.com/ProPublica" data-widget-id="250304934812205056">Tweets by @ProPublica</a>
|
140
|
+
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src="//platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
|
141
|
+
|
142
|
+
|
143
|
+
</div>
|
144
|
+
</aside>
|
145
|
+
<div id="search-toggler">
|
146
|
+
<div class="wNarrow search-toggle" id="search-glass"></div>
|
147
|
+
<form action="//www.propublica.org/search/search.php" method="get">
|
148
|
+
<input type="search" placeholder="Search ProPublica" name="q">
|
149
|
+
<input type="image" src="//s3.amazonaws.com/cdn.propublica.net/deploy/images/common/icn-search.png">
|
150
|
+
<div class="wNarrow btn-ios search-toggle">Cancel</div>
|
151
|
+
</form>
|
152
|
+
</div>
|
153
|
+
</div><!-- /wrapper -->
|
154
|
+
</div><!-- /h-wafer-3 -->
|
155
|
+
</header>
|
156
|
+
|
157
|
+
<!-- Top_Pushdown -->
|
158
|
+
<div id='google-Top_Pushdown' style="z-index: 3;" >
|
159
|
+
<script type='text/javascript'>
|
160
|
+
googletag.display('google-Top_Pushdown');
|
161
|
+
</script>
|
162
|
+
</div>
|
163
|
+
|
164
|
+
<div id="content-header">
|
165
|
+
<div class="wrapper">
|
166
|
+
<div class="content-header-text">
|
167
|
+
<h1 class="title-link">
|
168
|
+
Search Results
|
169
|
+
<span class="wNarrow">for <span class="search-term">tools</span></span>
|
170
|
+
</h1>
|
171
|
+
</div>
|
172
|
+
</div><!--/.wrapper-->
|
173
|
+
</div><!--/.content-header-->
|
174
|
+
<div id="content">
|
175
|
+
<div class="wrapper">
|
176
|
+
<div class="content-main">
|
177
|
+
<form action="" class="simple-search">
|
178
|
+
<input type="text" class="text-input" name="q" value="tools">
|
179
|
+
<input type="submit" class="btn-input" value="Search" />
|
180
|
+
</form>
|
181
|
+
|
182
|
+
|
183
|
+
<div class="compact-list-header utility-text">
|
184
|
+
<p class="results-count">
|
185
|
+
16-30 of <strong>22700</strong> results
|
186
|
+
</p>
|
187
|
+
|
188
|
+
</div>
|
189
|
+
|
190
|
+
<div class="compact-list">
|
191
|
+
<div class="compact-list-item">
|
192
|
+
<h2><a href="sixfacts.html" class="title-link">Chapter 1. Using Google Refine to Clean Messy Data </a><!-- --></h2>
|
193
|
+
<p>Dec 30, 2010 … Google Refine (the program formerly known as Freebase Gridworks) is described by its creators as a “power <b>tool</b> for working with messy data” …</p>
|
194
|
+
</div> </div><!--/.results-list-->
|
195
|
+
|
196
|
+
<div class="pagination">
|
197
|
+
<div class="pagination-menu">
|
198
|
+
|
199
|
+
<!-- <li><a href="#">« prev</a></li> -->
|
200
|
+
|
201
|
+
|
202
|
+
<a href="./search.php?q=tools&o=r&p=1">1</a>
|
203
|
+
|
204
|
+
<strong>2</strong>
|
205
|
+
|
206
|
+
<a href="./search.php?q=tools&o=r&p=3">3</a>
|
207
|
+
<a href="./search.php?q=tools&o=r&p=4">4</a>
|
208
|
+
<a href="./search.php?q=tools&o=r&p=5">5</a>
|
209
|
+
<a href="./search.php?q=tools&o=r&p=6">6</a>
|
210
|
+
<a href="./search.php?q=tools&o=r&p=7">7</a>
|
211
|
+
<a href="./search.php?q=tools&o=r&p=8">8</a>
|
212
|
+
|
213
|
+
<!-- <li><a href="#">next »</a></li> -->
|
214
|
+
</div>
|
215
|
+
</div>
|
216
|
+
|
217
|
+
<!-- Page 2 of 1514 -->
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
</div><!--/.content-main-->
|
228
|
+
|
229
|
+
<div class="content-right">
|
230
|
+
<div class="updates-module module">
|
231
|
+
<div class="module-top module-content">
|
232
|
+
<h3>Get Updates</h3>
|
233
|
+
</div>
|
234
|
+
<div class="module-btm module-content">
|
235
|
+
<form class="has-xid" id='freeform' method="post" action="http://www.propublica.org/" >
|
236
|
+
<div class='hiddenFields'>
|
237
|
+
<input type="hidden" name="ACT" value="40" />
|
238
|
+
<input type="hidden" name="URI" value="/rails/search_sidebar" />
|
239
|
+
<input type="hidden" name="status" value="open" />
|
240
|
+
<input type="hidden" name="return" value="/forms/thank_you" />
|
241
|
+
<input type="hidden" name="redirect_on_duplicate" value="" />
|
242
|
+
<input type="hidden" name="dynamic_xid" value="y" />
|
243
|
+
<input type="hidden" name="tracker_email_one" value="" />
|
244
|
+
<input type="hidden" name="tracker_template_one" value="" />
|
245
|
+
<input type="hidden" name="RET" value="http://www.propublica.org/rails/search_sidebar/" />
|
246
|
+
<input type="hidden" name="form_name" value="daily_email_form" />
|
247
|
+
<input type="hidden" name="ajax_request" value="y" />
|
248
|
+
<input type="hidden" name="params_id" value="79668565" />
|
249
|
+
<input type="hidden" name="site_id" value="1" />
|
250
|
+
</div>
|
251
|
+
|
252
|
+
|
253
|
+
<input type='hidden' value='1' name='subscribe_daily_email' />
|
254
|
+
<p>Stay on top of what we’re working on by subscribing to our email digest.</p>
|
255
|
+
<div class="form-row text-row">
|
256
|
+
<label for="subscribe-email">email</label> <input type="text" name="email" class="text-input" id="subscribe-email" />
|
257
|
+
</div>
|
258
|
+
<div class="form-row text-row">
|
259
|
+
<label for="subscribe-zip">zip code</label> <input type="text" name="postalcode" class="text-input" id="subscribe-zip" /> <span>optional</span>
|
260
|
+
</div>
|
261
|
+
<div class="form-row buttons-row">
|
262
|
+
<input type="submit" value="Subscribe" class="btn-input" />
|
263
|
+
</div>
|
264
|
+
</form>
|
265
|
+
</div>
|
266
|
+
</div>
|
267
|
+
|
268
|
+
<div class="donate-module module">
|
269
|
+
<div class="module-top module-content">
|
270
|
+
<h3>Safeguard the public interest.</h3>
|
271
|
+
<div class="module-icon pngfix"><!-- --></div>
|
272
|
+
</div>
|
273
|
+
<div class="module-btm module-content">
|
274
|
+
<p>Support ProPublica’s award-winning investigative journalism.</p>
|
275
|
+
<p><a href="http://www.propublica.org/site/donate/" class="btn-input">Donate</a></p>
|
276
|
+
</div>
|
277
|
+
</div><!--/.donate-module-->
|
278
|
+
<div class="simple-follow-module follow-module-single module">
|
279
|
+
<div class="module-top module-content">
|
280
|
+
<h3>Follow ProPublica</h3>
|
281
|
+
<div class="module-icon pngfix"><!-- --></div>
|
282
|
+
</div>
|
283
|
+
<div class="module-btm module-content">
|
284
|
+
<ul>
|
285
|
+
<li><a href="http://www.propublica.org/forms/newsletter_daily_email" class="email-sq-link">Email</a></li>
|
286
|
+
<li><a href="http://www.facebook.com/propublica" class="fb-link">Facebook</a></li>
|
287
|
+
<li><a href="http://www.twitter.com/propublica" class="tw-link">Twitter</a></li>
|
288
|
+
</ul>
|
289
|
+
<ul>
|
290
|
+
<li><a href="http://iphone.propublica.org/" class="apple-link">iPhone App</a></li>
|
291
|
+
<li><a href="/podcast/" class="podcast-link">Podcast</a></li>
|
292
|
+
<li><a href="http://feeds.propublica.org/propublica/main" class="feed-link">RSS</a></li>
|
293
|
+
</ul>
|
294
|
+
<div class="clear"></div>
|
295
|
+
</div>
|
296
|
+
</div><!--/follow-module-->
|
297
|
+
</div><!--/.content-right-->
|
298
|
+
</div><!--/.wrapper-->
|
299
|
+
|
300
|
+
</div><!--/#content-->
|
301
|
+
|
302
|
+
|
303
|
+
<footer class="shell wFull">
|
304
|
+
<div class="wrapper">
|
305
|
+
<a href="https://www.propublica.org/site/donate" id="foot-tab-donate">Donate</a>
|
306
|
+
<section class="copyright">
|
307
|
+
<p id="footer-logo-image">ProPublica</p>
|
308
|
+
<p>
|
309
|
+
© Copyright 2013<br>
|
310
|
+
Pro Publica Inc.
|
311
|
+
</p>
|
312
|
+
</section>
|
313
|
+
<ul>
|
314
|
+
<li><a href="http://www.propublica.org/about/">About Us</a></li>
|
315
|
+
<li><a href="http://propublica.org/about/staff">Staff</a></li>
|
316
|
+
<li><a href="http://propublica.org/about/contact">Contact</a></li>
|
317
|
+
<li><a href="http://propublica.org/about/jobs">Jobs</a></li>
|
318
|
+
<li><a href="http://propublica.org/about/corrections">Complaints and Corrections</a></li>
|
319
|
+
<li><a href="http://propublica.org/about/legal">Privacy Policy and Other Terms</a></li>
|
320
|
+
<li><a href="http://www.propublica.org/about/frequently-asked-questions/">FAQ</a></li>
|
321
|
+
</ul>
|
322
|
+
<ul>
|
323
|
+
<li>Subscribe: <a href="http://www.propublica.org/forms/newsletter_daily_email">Email</a> | <a href="http://feeds.propublica.org/propublica/main">RSS</a></li>
|
324
|
+
<li>Follow us: <a href="http://www.twitter.com/propublica">Twitter</a> | <a href="http://www.facebook.com/propublica">Facebook</a></li>
|
325
|
+
<li><a href="https://twitter.com/ProPublica/propublica-staff">See staff Twitter accounts</a></li>
|
326
|
+
<li><a href="http://www.propublica.org/getinvolved/about">Get Involved @ ProPublica</a></li>
|
327
|
+
<li>Download our apps for:<br> <a href="http://iphone.propublica.org/">iPhone</a> | <a href="https://market.android.com/details?id=com.propublica">Android</a></li>
|
328
|
+
<li><a href="http://www.propublica.org/podcast/">Listen to our Podcast</a></li>
|
329
|
+
</ul>
|
330
|
+
<section class="republish">
|
331
|
+
<a href="http://www.propublica.org/about/steal-our-stories" id="foot-tab-republish">Steal Our Stories</a>
|
332
|
+
<p>Unless otherwise noted, you can republish our stories for free if you <a href="http://www.propublica.org/about/steal-our-stories">follow these rules</a>.</p>
|
333
|
+
<!-- cc_ad -->
|
334
|
+
<div id='google-cc_ad' >
|
335
|
+
<script type='text/javascript'>
|
336
|
+
googletag.display('google-cc_ad');
|
337
|
+
</script>
|
338
|
+
</div>
|
339
|
+
|
340
|
+
</section>
|
341
|
+
</div><!-- /wrapper -->
|
342
|
+
</footer>
|
343
|
+
<footer class="shell wNarrow">
|
344
|
+
<div class="wrapper">
|
345
|
+
<aside class="untoggler">
|
346
|
+
<h1>Safeguard the public interest</h1>
|
347
|
+
<div>
|
348
|
+
<p>Support ProPublica's award-winning investigative journalism.</p>
|
349
|
+
<p class="btn-row"><a href="https://www.propublica.org/mobile/donate" class="btn-ios">Donate</a></p>
|
350
|
+
</div>
|
351
|
+
</aside>
|
352
|
+
<nav>
|
353
|
+
<a href="http://www.propublica.org/about/steal-our-stories">Steal Our Stories <span class="note">follow these rules</span></a>
|
354
|
+
<a href="http://propublica.org/about/legal">Privacy Policy</a>
|
355
|
+
<a href="http://www.propublica.org/forms/newsletter_daily_email">Sign up for our email list</a>
|
356
|
+
<p>© Copyright 2013 Pro Publica Inc.</p>
|
357
|
+
</nav>
|
358
|
+
</div><!-- /wrapper -->
|
359
|
+
</footer>
|
360
|
+
|
361
|
+
|
362
|
+
|
363
|
+
|
364
|
+
<script type="text/javascript" src="//www.propublica.org/js/public/assets/beacons.js"></script>
|
365
|
+
|
366
|
+
|
367
|
+
<script src="http://www.propublica.org/static/fonts/ss-social.js"></script>
|
368
|
+
<script src="http://www.propublica.org/static/fonts/ss-standard.js"></script>
|
369
|
+
|
370
|
+
</body>
|
371
|
+
</html>
|
372
|
+
|
373
|
+
|
374
|
+
</body>
|
375
|
+
</html>
|