pismo 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -28
- data/NOTICE +4 -0
- data/README.markdown +37 -40
- data/Rakefile +3 -2
- data/VERSION +1 -1
- data/bin/pismo +15 -7
- data/lib/pismo/document.rb +2 -2
- data/lib/pismo/internal_attributes.rb +23 -16
- data/lib/pismo/reader.rb +390 -0
- data/lib/pismo.rb +3 -2
- data/pismo.gemspec +23 -15
- data/test/corpus/bbcnews2.html +1575 -0
- data/test/corpus/gmane.html +138 -0
- data/test/corpus/metadata_expected.yaml +20 -5
- data/test/corpus/queness.html +919 -0
- data/test/corpus/reader_expected.yaml +45 -0
- data/test/corpus/tweet.html +360 -0
- data/test/corpus/zefrank.html +535 -0
- data/test/test_corpus.rb +9 -1
- metadata +89 -34
- data/lib/pismo/readability.rb +0 -342
- data/test/test_readability.rb +0 -152
@@ -0,0 +1,535 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
2
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
3
|
+
<head>
|
4
|
+
<meta content="text/html; charset=utf-8" http-equiv="content-type" />
|
5
|
+
<title>Ze Frank on Imaginary Audiences :: Articles :: The 99 Percent</title>
|
6
|
+
<meta name="description" content="We chat with the Internet's most notorious mass-collaboration instigator Ze Frank about idea execution and how to build armies of sportsracers." />
|
7
|
+
<meta name="keywords" content="Ze Frank, The Show, mass collaboration, brainstorming" />
|
8
|
+
<meta name="expires" content="never" />
|
9
|
+
<meta name="language" content="english" />
|
10
|
+
<meta name="distribution" content="global" />
|
11
|
+
<meta name="robots" content="index, follow" />
|
12
|
+
<meta name="revisit-after" content="2 days" />
|
13
|
+
<meta name="author" content="Behance" />
|
14
|
+
<meta name="publisher" content="Behance" />
|
15
|
+
<meta name="copyright" content="2010, Behance LLC" />
|
16
|
+
<link rel="shortcut icon" href="/favicon.ico" />
|
17
|
+
<link rel="alternate" type="application/rss+xml" title="The 99 Percent RSS Feed" href="/feeds/posts" />
|
18
|
+
|
19
|
+
<!--[if IE 6]>
|
20
|
+
<script type="text/javascript" src="http://behance.vo.llnwd.net/e2/99/js/DD_belatedPNG-min.js"></script>
|
21
|
+
<![endif]-->
|
22
|
+
|
23
|
+
<link type="text/css" rel="stylesheet" href="http://behance.vo.llnwd.net/e2/99/min/b=assets/99/css&f=reset.css,base.css,core.css,header.css&cb=19" />
|
24
|
+
<link type="text/css" rel="stylesheet" href="/includes/styles" />
|
25
|
+
<!--[if lt IE 7]>
|
26
|
+
<link rel="stylesheet" href="/assets/99/css/ie6.css" type="text/css" />
|
27
|
+
<![endif]-->
|
28
|
+
<!--[if lt IE 8]>
|
29
|
+
<style type="text/css" media="screen">
|
30
|
+
#site-header { zoom: 1; }
|
31
|
+
#site-search { width: 226px; }
|
32
|
+
</style>
|
33
|
+
<![endif]-->
|
34
|
+
</head>
|
35
|
+
|
36
|
+
<body class="section-articles single-post-page">
|
37
|
+
|
38
|
+
|
39
|
+
<div id="site-container">
|
40
|
+
|
41
|
+
<div id="site-header">
|
42
|
+
<h2 id="site-logo"><a class="pngfix" href="/">99%</a></h2>
|
43
|
+
<div id="site-tagline" class="pngfix">It's not about ideas. It's about making ideas happen.</div>
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="site-navigation">
|
47
|
+
<ul>
|
48
|
+
<li class="nav-home first"><a href="/">Home</a></li>
|
49
|
+
<li class="nav-articles active"><a href="/articles" class="active">Articles</a></li>
|
50
|
+
<li class="nav-tips"><a href="/tips">Tips</a></li>
|
51
|
+
<li class="nav-videos"><a href="/videos">Videos</a></li>
|
52
|
+
<li class="nav-conference"><a href="/conference">Conference</a></li>
|
53
|
+
<li class="nav-sessions"><a href="/sessions">Sessions</a></li>
|
54
|
+
<li class="nav-book last"><a href="/book">Book<span class="icon-new-badge"></span></a></li>
|
55
|
+
</ul>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
<div id="site-search">
|
59
|
+
<form action="/search/results" method="get" accept-charset="utf-8">
|
60
|
+
<div>
|
61
|
+
<label for="search">Search</label>
|
62
|
+
<input type="text" name="search" id="search" class="form-text form-search" />
|
63
|
+
</div>
|
64
|
+
</form>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
|
68
|
+
<div id="site-content">
|
69
|
+
|
70
|
+
<div id="primary-content" class="column column-4 r-gutter">
|
71
|
+
|
72
|
+
<div id="site-messages">
|
73
|
+
</div>
|
74
|
+
|
75
|
+
<div class="topics-column column column-1 r-gutter">
|
76
|
+
|
77
|
+
<h3 id="topics-header" class="section-header">/// Topics</h3>
|
78
|
+
|
79
|
+
|
80
|
+
<div id="categories-section" class="topics-section section clear">
|
81
|
+
<h4 class="topics-title">Category</h4>
|
82
|
+
<ul class="topics-list">
|
83
|
+
<li class=""><a href="/articles/browse/6/accountability">Accountability</a></li>
|
84
|
+
<li class=""><a href="/articles/browse/3/action-method">Action Method</a></li>
|
85
|
+
<li class=""><a href="/articles/browse/8/bias-to-action">Bias-To-Action</a></li>
|
86
|
+
<li class="active"><a href="/articles/browse/10/collaboration">Collaboration</a></li>
|
87
|
+
<li class=""><a href="/articles/browse/11/contrarianism">Contrarianism</a></li>
|
88
|
+
<li class=""><a href="/articles/browse/12/cross-pollination">Cross-pollination</a></li>
|
89
|
+
<li class=""><a href="/articles/browse/13/discipline">Discipline</a></li>
|
90
|
+
<li class="active"><a href="/articles/browse/14/execution">Execution</a></li>
|
91
|
+
<li class=""><a href="/articles/browse/21/feedback">Feedback</a></li>
|
92
|
+
<li class=""><a href="/articles/browse/16/financing">Financing</a></li>
|
93
|
+
<li class=""><a href="/articles/browse/18/leadership">Leadership</a></li>
|
94
|
+
<li class=""><a href="/articles/browse/67/limitations">Limitations</a></li>
|
95
|
+
<li class=""><a href="/articles/browse/1/organization">Organization</a></li>
|
96
|
+
<li class=""><a href="/articles/browse/2/passion">Passion</a></li>
|
97
|
+
<li class=""><a href="/articles/browse/4/perspiration">Perspiration</a></li>
|
98
|
+
<li class=""><a href="/articles/browse/20/prioritization">Prioritization</a></li>
|
99
|
+
<li class=""><a href="/articles/browse/15/professionalism">Professionalism</a></li>
|
100
|
+
<li class=""><a href="/articles/browse/65/prototyping">Prototyping</a></li>
|
101
|
+
<li class=""><a href="/articles/browse/68/recharging">Recharging</a></li>
|
102
|
+
<li class=""><a href="/articles/browse/5/self-marketing">Self-Marketing</a></li>
|
103
|
+
</ul>
|
104
|
+
</div>
|
105
|
+
|
106
|
+
|
107
|
+
<div id="categories-section" class="topics-section section clear">
|
108
|
+
<h4 class="topics-title">Creative Field</h4>
|
109
|
+
<ul class="topics-list">
|
110
|
+
<li class=""><a href="/articles/browse/47/advertising">Advertising</a></li>
|
111
|
+
<li class=""><a href="/articles/browse/69/architecture">Architecture</a></li>
|
112
|
+
<li class=""><a href="/articles/browse/45/art-direction">Art Direction</a></li>
|
113
|
+
<li class=""><a href="/articles/browse/35/consulting">Consulting</a></li>
|
114
|
+
<li class=""><a href="/articles/browse/61/design">Design</a></li>
|
115
|
+
<li class=""><a href="/articles/browse/32/entrepreneurship">Entrepreneurship</a></li>
|
116
|
+
<li class=""><a href="/articles/browse/24/fashion">Fashion</a></li>
|
117
|
+
<li class=""><a href="/articles/browse/27/film-tv">Film & TV</a></li>
|
118
|
+
<li class=""><a href="/articles/browse/25/graphic-design">Graphic Design</a></li>
|
119
|
+
<li class=""><a href="/articles/browse/30/illustration">Illustration</a></li>
|
120
|
+
<li class=""><a href="/articles/browse/38/industrial-design">Industrial Design</a></li>
|
121
|
+
<li class=""><a href="/articles/browse/54/interior-design">Interior Design</a></li>
|
122
|
+
<li class=""><a href="/articles/browse/56/jewelry">Jewelry</a></li>
|
123
|
+
<li class=""><a href="/articles/browse/33/motion-graphics">Motion Graphics</a></li>
|
124
|
+
<li class=""><a href="/articles/browse/26/music">Music</a></li>
|
125
|
+
<li class=""><a href="/articles/browse/34/nonprofit">Nonprofit</a></li>
|
126
|
+
<li class=""><a href="/articles/browse/63/performing-arts">Performing Arts</a></li>
|
127
|
+
<li class=""><a href="/articles/browse/28/photography">Photography</a></li>
|
128
|
+
<li class=""><a href="/articles/browse/40/publishing">Publishing</a></li>
|
129
|
+
<li class=""><a href="/articles/browse/62/retail">Retail</a></li>
|
130
|
+
<li class="active"><a href="/articles/browse/66/social-media">Social Media</a></li>
|
131
|
+
<li class=""><a href="/articles/browse/37/storytelling">Storytelling</a></li>
|
132
|
+
<li class=""><a href="/articles/browse/31/technology">Technology</a></li>
|
133
|
+
<li class=""><a href="/articles/browse/44/visual-arts">Visual Arts</a></li>
|
134
|
+
<li class=""><a href="/articles/browse/36/web-development">Web Development</a></li>
|
135
|
+
<li class=""><a href="/articles/browse/23/writing">Writing</a></li>
|
136
|
+
</ul>
|
137
|
+
</div>
|
138
|
+
|
139
|
+
|
140
|
+
</div>
|
141
|
+
|
142
|
+
<div id="single-post-column" class="articles-column posts-column column column-3">
|
143
|
+
|
144
|
+
<h3 class="section-header">/// Articles</h3>
|
145
|
+
|
146
|
+
<div class="single-post-section">
|
147
|
+
|
148
|
+
<div class="single-article-block single-post-block post-block block">
|
149
|
+
<div class="block-meta">
|
150
|
+
<h1 class="block-title">Ze Frank on Imaginary Audiences</h1>
|
151
|
+
<a id="post-id" rel="5923" class="hide">article</a>
|
152
|
+
<div class="block-author">
|
153
|
+
by <span class="name">Jocelyn K. Glei</span>
|
154
|
+
</div>
|
155
|
+
<div class="block-links">
|
156
|
+
<ul>
|
157
|
+
<li class="link-appreciations"><a class="appreciate appreciate-appendtotal">Appreciate</a></li>
|
158
|
+
<li class="link-tweets"><a class="tweet tweet-appendtotal" title="Ze Frank on Imaginary Audiences">Retweet</a></li>
|
159
|
+
<li id="sharethis" class="link-share"><a>Share This</a></li>
|
160
|
+
</ul>
|
161
|
+
</div>
|
162
|
+
</div>
|
163
|
+
<div class="block-image">
|
164
|
+
<img src="http://behance.vo.llnwd.net/e2/99/img/posts/c3/774f7b39c31c9b034cf12ab442ed5df8.jpg" />
|
165
|
+
</div>
|
166
|
+
<div class="block-description">
|
167
|
+
<span class="intro">If there’s anyone who knows how to marshal an online audience,
|
168
|
+
it’s Ze Frank. Ze is best-known for his 2006 program “The Show,” in
|
169
|
+
which he made a new 2-3 minute video every day for 1 year. Topics
|
170
|
+
ranged from “<a href="http://www.zefrank.com/theshow/archives/2006/08/083006.html">fingers in food</a>” to the
|
171
|
+
mysteries of <a target="_blank" href="http://www.zefrank.com/theshow/archives/2006/05/051006.html">airport signage</a> to a tour
|
172
|
+
de force summary of creatives’ addiction to un-executed ideas, aka
|
173
|
+
<a target="_blank" href="http://www.zefrank.com/theshow/archives/2006/07/071106.html">brain crack</a>. Ze
|
174
|
+
constantly sourced ideas for The Show from his audience, and since then
|
175
|
+
he’s gone on to toy with group collaboration in new ways. <a target="_blank" href="http://www.zefrank.com/from52to48withlove/">52 to 48 with
|
176
|
+
Love</a> called on McCain and
|
177
|
+
Obama voters to exchange messages of reconciliation, while the summer
|
178
|
+
camp-like <a target="_blank" href="http://www.zefrank.com/zesblog/archives/2008/03/colorwar_2008.html">Color Wars</a>
|
179
|
+
harnessed Twitter to bring ad-hoc groups of users together to complete
|
180
|
+
large-scale, creative projects in limited time frames.</span>These days,
|
181
|
+
as ever, Ze is executing lots of ideas and seeing what sticks. A
|
182
|
+
pilot for the Discovery Channel is in the works, “The Show”
|
183
|
+
has been reincarnated (in a slightly more buttoned-down fashion) for
|
184
|
+
<span style="font-style: italic;">TIME</span> magazine, and he’s collaborating with a team of developers on the
|
185
|
+
next iteration of Color Wars. In a wide-ranging conversation that
|
186
|
+
encompassed everything from a discourse on 5th-century theologian St
|
187
|
+
Augustine to a brainstorming session on “zombie golf,” we talked with
|
188
|
+
Ze about how he gets his ideas off the ground and the art of designing
|
189
|
+
for mass participation and collaboration. These are the highlights:<br><br><span style="font-weight: bold;">I’ve
|
190
|
+
heard you speak about “morphological synthesis” as part of your idea
|
191
|
+
development process before. How does that sort of free association
|
192
|
+
function for you in taking the first few steps on a project?</span><br><br>Morphological
|
193
|
+
synthesis is a way of trying to segment your thinking process into
|
194
|
+
parts. I definitely use it quite a bit. (Though not in the strictest
|
195
|
+
sense of the word.) You take 4-5 adjectives or characteristics and then
|
196
|
+
brainstorm in that direction.<br><br>Generally, when I have an idea I start
|
197
|
+
with a sense of scale. Let’s say Procter & Gamble has a new toilet
|
198
|
+
paper. If I’m trying to generate ideas around it, the first thing I’d
|
199
|
+
do is take a general imagination run into scale. What happens if you
|
200
|
+
have no toilet paper? What happens if you have way, way too much toilet
|
201
|
+
paper? What’s the smallest type of toilet paper that you would ever
|
202
|
+
use? What would an incredibly large toilet paper look like? Who is
|
203
|
+
someone that never uses toilet paper? Who is someone that uses it
|
204
|
+
constantly? What can you do with 10,000 rolls of toilet paper? What would a world with no toilet paper
|
205
|
+
look like? <br><br>I flip back and forth between the extremes until
|
206
|
+
something interesting comes out of it. And then you repeat the process
|
207
|
+
based on that new idea. It’s a super-cool exercise only in that it
|
208
|
+
forces you to explore the outside boundaries of things. <br><br><span style="font-weight: bold;">What kind
|
209
|
+
of a role does collaboration – for example, how you interact with your
|
210
|
+
enthusiastic online audience – play in your ability to keep producing
|
211
|
+
work?</span><br><br>I don’t feel like participating and collaboration is
|
212
|
+
fundamental. But I do feel like it’s certainly one of the more exciting
|
213
|
+
parts of the digital age. And I think that there’s a lot of really cool
|
214
|
+
stuff to do in that arena.<br><br>For me, there are two facets: one is
|
215
|
+
creating the work fast, and the other is publishing the work fast.
|
216
|
+
Those are two different things. Publishing it fast, for me, certainly
|
217
|
+
in the early stages from 2000-2006, was super-important, because I was
|
218
|
+
trying to understand the relationship between the genesis idea, the
|
219
|
+
work itself, and the audience – and how those three things interplay
|
220
|
+
with each other. <br><br><span class="blockquote">I say make it as quickly and faithfully as
|
221
|
+
possible. ‘Quickly’ and ‘faithfully’ kind of pull in opposite
|
222
|
+
directions. I find that that’s a nice tension to work in.</span><br><br>What I was finding was that there was an
|
223
|
+
opportunity for me to explore this creative process that normally all
|
224
|
+
happens internally, in a different way where you release work, and then
|
225
|
+
you allow the frameworks that emerge from the way people respond to the
|
226
|
+
work as some kind of feedback cycle. It’s incredible, and it gets
|
227
|
+
talked about a lot now, as people try to open up the work that way,
|
228
|
+
allowing people to react, respond.<br><br>The second thing is making the
|
229
|
+
work fast. I usually try to say ‘make it as quickly and faithfully as
|
230
|
+
possible.’ ‘Quickly’ and ‘faithfully’ kind of pull in opposite
|
231
|
+
directions. I find that that’s a nice tension to work in – try and get
|
232
|
+
it out as fast as possible, but don’t take shortcuts just for time. Try
|
233
|
+
to stay true to the original concept. I find that that is important
|
234
|
+
because you end up making more stuff. And for me, the most difficult
|
235
|
+
part of the process is the first 50%. <br><br><span style="font-weight: bold;">Do you find it harder to complete work when that audience isn’t present? Or is the process just different?</span><br><br>If
|
236
|
+
there is one sea-change that’s happening in the world of creativity, in
|
237
|
+
a broad spectrum kind of way, it is the awareness that more and more
|
238
|
+
people have of the fact that there IS an audience. When people become
|
239
|
+
aware of some sort of sense of audience, it changes the dynamics of
|
240
|
+
making things. What I’m interested in is how your imagination of the
|
241
|
+
potential audience changes your work. <br><br>For me, audience has become
|
242
|
+
what Walt Disney would call that third voice, that critical voice. As
|
243
|
+
I’m working, I’m using my impression of all the different times that
|
244
|
+
I’ve interacted with the audience, all those voices that I’ve heard
|
245
|
+
over time, and they come back into the work. It’s like I’m bouncing the
|
246
|
+
idea around between all of these different factional voice-blocks. The
|
247
|
+
people who think you suck no matter what you do, the people who
|
248
|
+
obviously just want you to feel bad, the people who like what you do
|
249
|
+
regardless, the people who are constantly thinking your work is bigger
|
250
|
+
than it is. So all those kinds of voices become these critical
|
251
|
+
frameworks, or lenses, to look at your work through, even before you
|
252
|
+
publish.<br><br><span class="blockquote">For me, audience has become
|
253
|
+
what Walt Disney would call that third voice, that critical voice.</span><br><br>You look at the random YouTube or you look at the random
|
254
|
+
blogger, and you can almost reverse engineer what that notion of
|
255
|
+
audience was. Some people think the world is populated by people just
|
256
|
+
like them, some people imagine their audience to be far, far greater
|
257
|
+
than it really is, some people seem to be completely unaware.<br><br>It’s
|
258
|
+
almost this pastiche of internal representations of audience that’s
|
259
|
+
creating such a confusing media landscape right now. You look at one
|
260
|
+
single comments train on a political blog and it becomes so jarring.
|
261
|
+
Some people are speaking to the world, some people are speaking to just
|
262
|
+
one other person. It’s almost as if, with each comment, the size of the
|
263
|
+
room changes.<br><br>I long ago stripped away the notion that a creative
|
264
|
+
person’s life is just about satisfying oneself. I don’t think that
|
265
|
+
that’s a reasonable way of going about your work. And I think it’s a
|
266
|
+
very romantic and broken way of thinking about creativity. <br><br>--<br><span style="font-style: italic;">Read
|
267
|
+
thoughts from Imogen Heap, Ben Stiller, Clay Shirky, and more
|
268
|
+
creatives on <a target="_blank" href="http://www.zefrank.com/audience/">their relationship to audience</a>, courtesy of Ze.<br><br>Photo courtesy of <a target="_blank" href="http://laughingsquid.com/" style="font-style: italic;">Scott Beale/Laughing Squid</a>.<br></span> </div>
|
269
|
+
</div>
|
270
|
+
|
271
|
+
</div>
|
272
|
+
|
273
|
+
<div class="form-item action-buttons">
|
274
|
+
<a class="appreciate-button submit-button button disabled-button appreciate-disabled-button appreciate hide appreciate-updatetext">Appreciate</a>
|
275
|
+
<a class="tweet-button submit-button button link-external tweet" title="Ze Frank on Imaginary Audiences">Retweet this article</a>
|
276
|
+
</div>
|
277
|
+
|
278
|
+
<div class="comments">
|
279
|
+
|
280
|
+
<h3 class="comments-header section-header section-subheader-alt">/// Comments
|
281
|
+
<a id="comments" class="comment-add-button highlight-button link-button button">Add a Comment</a>
|
282
|
+
</h3>
|
283
|
+
|
284
|
+
<div class="comment-form alert">
|
285
|
+
<form action="/articles/5923/ze-frank-on-imaginary-audiences#comments" method="post" accept-charset="utf-8">
|
286
|
+
<div class="form-item">
|
287
|
+
<label for="comment">Comment <span class="required">(required)</span></label>
|
288
|
+
<textarea name="comment" id="comment-submit-comment" class="form-text required" rows="8" cols="40"></textarea>
|
289
|
+
</div>
|
290
|
+
<div class="form-item column column-1 r-gutter">
|
291
|
+
<label for="author_name">Your Name <span class="required">(required)</span></label>
|
292
|
+
<input type="text" name="author_name" id="comment-submit-author_name" class="form-text required" value="" />
|
293
|
+
</div>
|
294
|
+
<div class="form-item column column-1 r-gutter">
|
295
|
+
<label for="author_email">Your Email <span class="required">(required)</span></label>
|
296
|
+
<input type="text" name="author_email" id="comment-submit-author_email" class="form-text required" value="" />
|
297
|
+
</div>
|
298
|
+
<div class="form-item column column-1">
|
299
|
+
<label for="author_link">Your Website</label>
|
300
|
+
<input type="text" name="author_link" id="comment-submit-author_link" class="form-text" value="" />
|
301
|
+
</div>
|
302
|
+
<div class="column column-2 r-gutter">
|
303
|
+
<div class="captcha-form">
|
304
|
+
<label for="captcha_input">Enter the characters displayed below <span class="required">(required)</span></label>
|
305
|
+
<img src="/ajax/captcha?cb=1961718107" alt="Captcha" class="captcha-image" />
|
306
|
+
<input type="text" name="captcha_input" id="captcha_input" class="form-text" />
|
307
|
+
<a href="/ajax/captcha?cb=1961718107" class="captcha-reload">Stuck? Try Another...</a>
|
308
|
+
</div> </div>
|
309
|
+
<div class="form-item column">
|
310
|
+
<input type="hidden" name="form_action" value="comment_submit" />
|
311
|
+
<input type="submit" class="submit-button button" value="Submit Comment" />
|
312
|
+
</div>
|
313
|
+
</form>
|
314
|
+
</div>
|
315
|
+
|
316
|
+
|
317
|
+
</div>
|
318
|
+
</div>
|
319
|
+
|
320
|
+
</div>
|
321
|
+
|
322
|
+
|
323
|
+
<div id="site-sidebar" class="column column-1">
|
324
|
+
|
325
|
+
<div class="what-is-block sidebar-item toggle-item toggle-collapsed">
|
326
|
+
<h3 class="what-is-the-99-header section-header">/// What Is The 99%</h3>
|
327
|
+
<div class="what-is-content sidebar-item-content">
|
328
|
+
<p class="quote">
|
329
|
+
<span>“Genius is 1% inspiration and 99% perspiration”</span>
|
330
|
+
<span class="attribution">— Thomas Edison</span>
|
331
|
+
</p>
|
332
|
+
<p class="info">At 99%, <a href="http://www.behance.com" target="_blank" class="link-external">Behance</a>'s think tank, we focus on what happens after inspiration—researching the forces that truly push ideas forward. Our profiles of proven idea makers, action-oriented tips, best-practices sessions, and annual conference are all designed to help you transform ideas from vision to reality.</p>
|
333
|
+
<p class="more"><a href="/about">Learn More</a></p>
|
334
|
+
</div>
|
335
|
+
</div>
|
336
|
+
|
337
|
+
|
338
|
+
<div class="book-block sidebar-item sidebar-item">
|
339
|
+
<h3 class="book-header section-header">/// The 99% Book</h3>
|
340
|
+
<div class="sidebar-item-image">
|
341
|
+
<a href="/book"><img class="pngfix" src="http://behance.vo.llnwd.net/e2/99/img/general/book-sidebar.png" alt="Making Things Happen" /></a>
|
342
|
+
</div>
|
343
|
+
</div>
|
344
|
+
|
345
|
+
|
346
|
+
<div class="newsletter-subscribe-block sidebar-item">
|
347
|
+
<h3 class="newsletter-header section-header">/// The Newsletter</h3>
|
348
|
+
<div class="sidebar-item-content">
|
349
|
+
<p class="info">Sign up for our newsletter and stay updated on the 99%.</p>
|
350
|
+
<div class="newsletter-subscribe-form" >
|
351
|
+
<form action="/articles/5923/ze-frank-on-imaginary-audiences" method="post" accept-charset="utf-8">
|
352
|
+
<div class="form-item">
|
353
|
+
<label for="newsletter_email">Email address</label>
|
354
|
+
<input type="text" name="newsletter_email" id="newsletter-email" class="form-text hint" value="" />
|
355
|
+
</div>
|
356
|
+
<div class="form-item">
|
357
|
+
<input type="hidden" name="form_action" value="newsletter_subscribe" />
|
358
|
+
<input type="submit" class="submit-button button" value="Subscribe" />
|
359
|
+
<img class="spinner" src="http://behance.vo.llnwd.net/e2/99/img/icons/ajax-loader.gif" style="display: none;" />
|
360
|
+
</div>
|
361
|
+
</form>
|
362
|
+
</div>
|
363
|
+
</div>
|
364
|
+
</div>
|
365
|
+
|
366
|
+
<div class="updates-block sidebar-item sidebar-item-last">
|
367
|
+
<h3 class="updates-header section-header">/// Updates</h3>
|
368
|
+
<div class="sidebar-item-content">
|
369
|
+
<p class="info">Need updates constantly? Follow us and get the latest tips instantly!</p>
|
370
|
+
<ul>
|
371
|
+
<li class="first"><a href="http://twitter.com/the99percent"><img src="http://behance.vo.llnwd.net/e2/99/img/general/twitter.png" alt="Follow Us on Twitter" /></a></li>
|
372
|
+
<li class="last"><a href="/feeds/posts" class="rss-feed">RSS Feed</a></li>
|
373
|
+
</ul>
|
374
|
+
</div>
|
375
|
+
</div>
|
376
|
+
|
377
|
+
</div>
|
378
|
+
|
379
|
+
<div id="site-sidebar-secondary" class="column column-1">
|
380
|
+
<div class="related-articles-section related-posts-section">
|
381
|
+
<h3 class="related-posts-header section-header">/// Related Articles</h3>
|
382
|
+
<ul class="related-videos related-posts">
|
383
|
+
<li class="related-post">
|
384
|
+
<div class="block-image">
|
385
|
+
<a href="/articles/5694/blik-making-ideas-stick"><img src="http://behance.vo.llnwd.net/e2/99/img/posts/c1/f378b57eab881bd88af6634e8a1fd91d.jpg" /></a>
|
386
|
+
</div>
|
387
|
+
<h3 class="block-title"><a href="/articles/5694/blik-making-ideas-stick">blik: Making Ideas Stick</a></h3>
|
388
|
+
<div class="block-topics">
|
389
|
+
<span class="block-topics-header">Topics:</span>
|
390
|
+
<ul>
|
391
|
+
<li class="item-first"><a href="/articles/browse/10/collaboration">Collaboration</a>, </li>
|
392
|
+
<li class=""><a href="/articles/browse/14/execution">Execution</a>, </li>
|
393
|
+
<li class=""><a href="/articles/browse/54/interior-design">Interior Design</a>, </li>
|
394
|
+
<li class=""><a href="/articles/browse/4/perspiration">Perspiration</a>, </li>
|
395
|
+
<li class="item-last"><a href="/articles/browse/62/retail">Retail</a></li>
|
396
|
+
</ul>
|
397
|
+
</div>
|
398
|
+
</li>
|
399
|
+
<li class="related-post">
|
400
|
+
<div class="block-image">
|
401
|
+
<a href="/articles/5635/57-even-defining-the-new-guard"><img src="http://behance.vo.llnwd.net/e2/99/img/posts/c1/7358e10623dd18e8b9a44384574ab1ef.jpg" /></a>
|
402
|
+
</div>
|
403
|
+
<h3 class="block-title"><a href="/articles/5635/57-even-defining-the-new-guard">57 Even: Defining the New Guard</a></h3>
|
404
|
+
<div class="block-topics">
|
405
|
+
<span class="block-topics-header">Topics:</span>
|
406
|
+
<ul>
|
407
|
+
<li class="item-first"><a href="/articles/browse/6/accountability">Accountability</a>, </li>
|
408
|
+
<li class=""><a href="/articles/browse/10/collaboration">Collaboration</a>, </li>
|
409
|
+
<li class=""><a href="/articles/browse/14/execution">Execution</a>, </li>
|
410
|
+
<li class="item-last"><a href="/articles/browse/44/visual-arts">Visual Arts</a></li>
|
411
|
+
</ul>
|
412
|
+
</div>
|
413
|
+
</li>
|
414
|
+
<li class="related-post">
|
415
|
+
<div class="block-image">
|
416
|
+
<a href="/articles/6577/apartment-therapy-calm-homes-breed-happy-people"><img src="http://behance.vo.llnwd.net/e2/99/img/posts/c1/9f47b65f98f6f5ab6709e881a62df1db.jpg" /></a>
|
417
|
+
</div>
|
418
|
+
<h3 class="block-title"><a href="/articles/6577/apartment-therapy-calm-homes-breed-happy-people">Apartment Therapy: Calm Homes Breed Happy People</a></h3>
|
419
|
+
<div class="block-topics">
|
420
|
+
<span class="block-topics-header">Topics:</span>
|
421
|
+
<ul>
|
422
|
+
<li class="item-first"><a href="/articles/browse/61/design">Design</a>, </li>
|
423
|
+
<li class=""><a href="/articles/browse/32/entrepreneurship">Entrepreneurship</a>, </li>
|
424
|
+
<li class=""><a href="/articles/browse/14/execution">Execution</a>, </li>
|
425
|
+
<li class=""><a href="/articles/browse/54/interior-design">Interior Design</a>, </li>
|
426
|
+
<li class="item-last"><a href="/articles/browse/2/passion">Passion</a></li>
|
427
|
+
</ul>
|
428
|
+
</div>
|
429
|
+
</li>
|
430
|
+
</ul>
|
431
|
+
<a href="/articles/browse">See all articles</a>
|
432
|
+
</div>
|
433
|
+
</div>
|
434
|
+
|
435
|
+
|
436
|
+
</div>
|
437
|
+
|
438
|
+
<div id="site-footer">
|
439
|
+
<div class="footer-column-1 footer-column column">
|
440
|
+
|
441
|
+
<div class="column column-1 r-mini-gutter">
|
442
|
+
<h3 id="who-we-are-header" class="section-header">/// Who We Are</h3>
|
443
|
+
<div class="who-we-are-section section">
|
444
|
+
<a href="http://www.behance.com" class="behance-logo"><img src="http://behance.vo.llnwd.net/e2/99/img/general/behance-footer-logo.png" alt="Behance" /></a>
|
445
|
+
<p>Based on our research of exceptionally productive people and teams, Behance develops products and services that organize the creative professional community.</p>
|
446
|
+
<p><a href="http://www.behance.com/about">Learn More</a> →</p>
|
447
|
+
</div>
|
448
|
+
</div>
|
449
|
+
|
450
|
+
<div class="products-column column">
|
451
|
+
<h3 id="our-products-header" class="section-header">/// Our Products & Services</h3>
|
452
|
+
<ul class="products-list">
|
453
|
+
<li class="behance-network first"><a href="http://www.behance.net" class="link-external"><span class="title">The Behance Network</span> <span class="description">The leading platform for creative professionals</span></a></li>
|
454
|
+
<li class="creatives-outfitter"><a href="http://www.creativesoutfitter.com" class="link-external"><span class="title">The Creative's Outfitter</span> <span class="description">Notebooks & tools for productivity</span></a></li>
|
455
|
+
<li class="action-method"><a href="http://www.actionmethod.com" class="link-external"><span class="title">The Action Method</span> <span class="description">An intuitive project management application</span></a></li>
|
456
|
+
<li class="served-sites last"><a href="http://www.theserved.com" class="link-external"><span class="title">The Served Sites</span> <span class="description">Fresh work from top creatives</span></a></li>
|
457
|
+
</ul>
|
458
|
+
</div>
|
459
|
+
|
460
|
+
</div>
|
461
|
+
|
462
|
+
<div class="footer-column-2 footer-column column column-1_5">
|
463
|
+
|
464
|
+
<ul class="links">
|
465
|
+
<li class="first"><a href="/about">About</a></li>
|
466
|
+
<li><a href="/advisory">Advisory</a></li>
|
467
|
+
<!-- <li><a href="/masthead">Masthead</a></li> -->
|
468
|
+
<li><a href="http://www.behance.com/legal/privacy">Privacy Policy</a></li>
|
469
|
+
<li class="last"><a href="/contact">Contact</a></li>
|
470
|
+
</ul>
|
471
|
+
|
472
|
+
<p class="copyright">Copyright © 2010 Behance LLC</p>
|
473
|
+
|
474
|
+
</div>
|
475
|
+
|
476
|
+
</div>
|
477
|
+
|
478
|
+
</div>
|
479
|
+
|
480
|
+
|
481
|
+
<script type="text/javascript" src="http://behance.vo.llnwd.net/e2/99/min/b=assets/99/js&f=jquery-1.3.2.min.js,jquery.hint.js,jquery.url.js,jquery.scrollable.js,jquery.equalheights.js,jquery.preloadcssimages.js,utilities.js?cb=19"></script>
|
482
|
+
<script type="text/javascript" src="http://w.sharethis.com/button/sharethis.js#publisher=561f5b06-f4eb-4bc4-9b29-6b2511438220&type=website&buttonText=Share&send_services=email%2Csms%2Caim&post_services=twitter%2Cfacebook%2Cmyspace%2Cdigg%2Cdelicious%2Cstumbleupon%2Creddit%2Clinkedin%2Cfriendfeed%2Cblogger%2Cwordpress%2Ctypepad%2Cgoogle_bmarks%2Cyahoo_bmarks%2Cwindows_live%2Ctechnorati%2Cnewsvine%2Cfurl%2Cfark%2Ccurrent%2Clivejournal&linkfg=%23e91c6b&button=false&offsetLeft=-296"></script>
|
483
|
+
<script>
|
484
|
+
if ( $('#sharethis').length ) {
|
485
|
+
var shareThis = SHARETHIS.addEntry({ title: 'Share', summary: 'Share this 99% post.'}, { button: false });
|
486
|
+
shareThis.attachButton( $('#sharethis')[0] );
|
487
|
+
}
|
488
|
+
</script>
|
489
|
+
<script type="text/javascript">
|
490
|
+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
491
|
+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
492
|
+
</script>
|
493
|
+
<script type="text/javascript">
|
494
|
+
try {
|
495
|
+
var pageTracker = _gat._getTracker("UA-1309100-18");
|
496
|
+
pageTracker._trackPageview();
|
497
|
+
} catch(err) {}
|
498
|
+
</script>
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
<script type="text/javascript">
|
503
|
+
_qoptions={
|
504
|
+
qacct:"p-63LGhSQQNmjVo"
|
505
|
+
};
|
506
|
+
</script>
|
507
|
+
<script type="text/javascript" src="http://edge.quantserve.com/quant.js"></script>
|
508
|
+
<noscript>
|
509
|
+
<img src="http://pixel.quantserve.com/pixel/p-63LGhSQQNmjVo.gif" style="display: none;" border="0" height="1" width="1" alt="Quantcast"/>
|
510
|
+
</noscript>
|
511
|
+
|
512
|
+
|
513
|
+
|
514
|
+
<script>
|
515
|
+
document.write(unescape("%3Cscript src='" + (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js' %3E%3C/script%3E"));
|
516
|
+
</script>
|
517
|
+
|
518
|
+
<script>
|
519
|
+
COMSCORE.beacon({
|
520
|
+
c1:2,
|
521
|
+
c2:6508569,
|
522
|
+
c3:"",
|
523
|
+
c4:"",
|
524
|
+
c5:"",
|
525
|
+
c6:"",
|
526
|
+
c15:""
|
527
|
+
});
|
528
|
+
</script>
|
529
|
+
<noscript>
|
530
|
+
<img src="http://b.scorecardresearch.com/b?c1=2&c2=6508569&c3=&c4=&c5=&c6=&c15=&cv=1.3&cj=1" style="display:none" width="0" height="0" alt="" />
|
531
|
+
</noscript>
|
532
|
+
|
533
|
+
|
534
|
+
</body>
|
535
|
+
</html>
|
data/test/test_corpus.rb
CHANGED
@@ -9,7 +9,8 @@ class TestCorpus < Test::Unit::TestCase
|
|
9
9
|
Dir[HTML_DIRECTORY + "/*.html"].each { |filename| @corpus[File.basename(filename).sub(/\.html$/, '').to_sym] = File.read(filename) }
|
10
10
|
|
11
11
|
# Load the "expected metadata" ready for tests
|
12
|
-
@metadata = YAML.load(open(HTML_DIRECTORY + "/metadata_expected.yaml"))
|
12
|
+
@metadata = YAML.load(open(HTML_DIRECTORY + "/metadata_expected.yaml"))
|
13
|
+
@reader_metadata = YAML.load(open(HTML_DIRECTORY + "/reader_expected.yaml"))
|
13
14
|
end
|
14
15
|
|
15
16
|
should "pass basic sanitization and result in Nokogiri documents" do
|
@@ -29,5 +30,12 @@ class TestCorpus < Test::Unit::TestCase
|
|
29
30
|
end
|
30
31
|
end
|
31
32
|
end
|
33
|
+
|
34
|
+
should "pass content extraction tests" do
|
35
|
+
@reader_metadata.each do |file, expected|
|
36
|
+
@doc = Reader::Document.new(@corpus[file])
|
37
|
+
assert_equal expected, @doc.sentences(2)
|
38
|
+
end
|
39
|
+
end
|
32
40
|
end
|
33
41
|
end
|