xp 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +16 -0
- data/LICENSE.txt +22 -0
- data/README.markdown +38 -0
- data/Rakefile +1 -0
- data/bin/xp +13 -12
- data/lib/user_agents.rb +10 -0
- data/lib/xp.rb +55 -0
- data/pkg/xp-0.0.1.gem +0 -0
- data/pkg/xp-0.0.2.gem +0 -0
- data/pkg/xp-1.0.0.gem +0 -0
- data/research/attribute_extractor.rb +7 -0
- data/research/bb_parser.rb +8 -0
- data/research/interface.rb +7 -0
- data/research/load.rb +1 -0
- data/research/selector.rb +10 -0
- data/research/y.htm +4452 -0
- data/test/test_content.html +122 -0
- data/test/test_helper.rb +6 -0
- data/test/xp_test.rb +55 -0
- data/xp.gemspec +17 -0
- metadata +33 -9
@@ -0,0 +1,122 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
2
|
+
<html class="evernote_clearly__before_visible evernote_clearly__is_visible"><head>
|
3
|
+
<meta http-equiv="content-type" content="text/html; charset=windows-1252"><script src="test_content_files/fa27bb6ce937aea400cc8e5f11aa42d5.js" async="" type="text/javascript"></script><script type="text/javascript">
|
4
|
+
<!--
|
5
|
+
(new Image).src="http://store.yahoo.net/cgi-bin/refsd?e=http://paulgraham.com/know.html&h=paulgraham.com&v=1.0&dr=" + escape(document.referrer);
|
6
|
+
-->
|
7
|
+
</script>
|
8
|
+
<title>How You Know</title><!-- <META NAME="ROBOTS" CONTENT="NOODP"> -->
|
9
|
+
<link rel="shortcut icon" href="http://ycombinator.com/arc/arc.png">
|
10
|
+
<style type="text/css"></style></head><body class="evernote_clearly__before_visible evernote_clearly__is_visible" bgcolor="ffffff" background="test_content_files/paulgraham_2271_0.gif" text="000000" vlink="464646" link="000099"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr valign="top"><td><map name="10fe3548f29bf95"><area shape="rect" coords="0,0,67,21" href="http://paulgraham.com/index.html"><area shape="rect" coords="0,21,67,42" href="http://paulgraham.com/articles.html"><area shape="rect" coords="0,42,67,63" href="http://www.amazon.com/gp/product/0596006624"><area shape="rect" coords="0,63,67,84" href="http://paulgraham.com/books.html"><area shape="rect" coords="0,84,67,105" href="http://ycombinator.com/"><area shape="rect" coords="0,105,67,126" href="http://startupschool.org/"><area shape="rect" coords="0,126,67,147" href="http://paulgraham.com/arc.html"><area shape="rect" coords="0,147,67,168" href="http://paulgraham.com/lisp.html"><area shape="rect" coords="0,168,67,189" href="http://paulgraham.com/antispam.html"><area shape="rect" coords="0,189,67,210" href="http://paulgraham.com/kedrosky.html"><area shape="rect" coords="0,210,67,231" href="http://paulgraham.com/faq.html"><area shape="rect" coords="0,231,67,252" href="http://paulgraham.com/raq.html"><area shape="rect" coords="0,252,67,273" href="http://paulgraham.com/quo.html"><area shape="rect" coords="0,273,67,294" href="http://paulgraham.com/rss.html"><area shape="rect" coords="0,294,67,315" href="http://paulgraham.com/bio.html"><area shape="rect" coords="0,315,67,336" href="https://twitter.com/paulg"><area shape="rect" coords="0,336,67,357" href="http://paulgraham.com/nsearch.html"><area shape="rect" coords="0,357,67,378" href="http://paulgraham.com/ind.html"></map><img src="test_content_files/essays-1.gif" usemap="#10fe3548f29bf95" ismap="ismap" border="0" height="378" hspace="0" vspace="0" width="69"></td><td><img src="test_content_files/trans_1x1.gif" border="0" height="1" width="26"></td><td><a href="http://paulgraham.com/index.html"><img src="test_content_files/paulgraham_2271_3232.gif" border="0" height="45" hspace="0" vspace="0" width="410"></a><br><br><table border="0" cellpadding="0" cellspacing="0" width="435"><tbody><tr valign="top"><td width="435"><img src="test_content_files/how-you-know-1.gif" alt="How You Know" border="0" height="18" hspace="0" vspace="0" width="120"><br><br><font face="verdana" size="2">December 2014<br><br>I've read Villehardouin's chronicle of the Fourth Crusade at least
|
11
|
+
two times, maybe three. And yet if I had to write down everything
|
12
|
+
I remember from it, I doubt it would amount to much more than a
|
13
|
+
page. Multiply this times several hundred, and I get an uneasy
|
14
|
+
feeling when I look at my bookshelves. What use is it to read all
|
15
|
+
these books if I remember so little from them?<br><br>A few months ago, as I was reading Constance Reid's excellent
|
16
|
+
biography of Hilbert, I figured out if not the answer to this
|
17
|
+
question, at least something that made me feel better about it.
|
18
|
+
She writes:
|
19
|
+
<blockquote>
|
20
|
+
Hilbert had no patience with mathematical lectures which filled
|
21
|
+
the students with facts but did not teach them how to frame a
|
22
|
+
problem and solve it. He often used to tell them that "a perfect
|
23
|
+
formulation of a problem is already half its solution."
|
24
|
+
</blockquote>
|
25
|
+
That has always seemed to me an important point, and I was even
|
26
|
+
more convinced of it after hearing it confirmed by Hilbert.<br><br>But how had I come to believe in this idea in the first place? A
|
27
|
+
combination of my own experience and other things I'd read. None
|
28
|
+
of which I could at that moment remember! And eventually I'd forget
|
29
|
+
that Hilbert had confirmed it too. But my increased belief in the
|
30
|
+
importance of this idea would remain something I'd learned from
|
31
|
+
this book, even after I'd forgotten I'd learned it.<br><br>Reading and experience train your model of the world. And even if
|
32
|
+
you forget the experience or what you read, its effect on your model
|
33
|
+
of the world persists. Your mind is like a compiled program you've
|
34
|
+
lost the source of. It works, but you don't know why.<br><br>The place to look for what I learned from Villehardouin's chronicle
|
35
|
+
is not what I remember from it, but my mental models of the crusades,
|
36
|
+
Venice, medieval culture, siege warfare, and so on. Which doesn't
|
37
|
+
mean I couldn't have read more attentively, but at least the harvest
|
38
|
+
of reading is not so miserably small as it might seem.<br><br>This is one of those things that seem obvious in retrospect. But
|
39
|
+
it was a surprise to me and presumably would be to anyone else who
|
40
|
+
felt uneasy about (apparently) forgetting so much they'd read.<br><br>Realizing it does more than make you feel a little better about
|
41
|
+
forgetting, though. There are specific implications.<br><br>For example, reading and experience are usually "compiled" at the
|
42
|
+
time they happen, using the state of your brain at that time. The
|
43
|
+
same book would get compiled differently at different points in
|
44
|
+
your life. Which means it is very much worth reading important
|
45
|
+
books multiple times. I always used to feel some misgivings about
|
46
|
+
rereading books. I unconsciously lumped reading together with work
|
47
|
+
like carpentry, where having to do something again is a sign you
|
48
|
+
did it wrong the first time. Whereas now the phrase "already read"
|
49
|
+
seems almost ill-formed.<br><br>Intriguingly, this implication isn't limited to books. Technology
|
50
|
+
will increasingly make it possible to relive our experiences. When
|
51
|
+
people do that today it's usually to enjoy them again (e.g. when
|
52
|
+
looking at pictures of a trip) or to find the origin of some bug in
|
53
|
+
their compiled code (e.g. when Stephen Fry succeeded in remembering
|
54
|
+
the childhood trauma that prevented him from singing). But as
|
55
|
+
technologies for recording and playing back your life improve, it
|
56
|
+
may become common for people to relive experiences without any goal
|
57
|
+
in mind, simply to learn from them again as one might when rereading
|
58
|
+
a book.<br><br>Eventually we may be able not just to play back experiences but
|
59
|
+
also to index and even edit them. So although not knowing how you
|
60
|
+
know things may seem part of being human, it may not be.<br><br><br><br><br><br><br><br>
|
61
|
+
<b>Thanks</b> to Sam Altman, Jessica Livingston, and Robert Morris for reading
|
62
|
+
drafts of this.<br><br></font></td></tr></tbody></table><table border="0" cellpadding="0" cellspacing="0" width="435"><tbody><tr><td><font face="verdana" size="2"><br><br><hr></font></td></tr></tbody></table></td></tr></tbody></table>
|
63
|
+
<script type="text/javascript">
|
64
|
+
csell_env = 'bf1';
|
65
|
+
</script>
|
66
|
+
<script type="text/javascript">
|
67
|
+
// Begin Yahoo Store Generated Code
|
68
|
+
</script> <script type="text/javascript" src="test_content_files/ylc_1.js"></script> <script type="text/javascript" src="test_content_files/beacon-1.js">
|
69
|
+
</script>
|
70
|
+
<script type="text/javascript">
|
71
|
+
// Begin Yahoo Store Generated Code
|
72
|
+
csell_page_data = {}; csell_page_rec_data = []; ts='TOK_STORE_ID';
|
73
|
+
</script>
|
74
|
+
<script type="text/javascript">
|
75
|
+
// Begin Yahoo Store Generated Code
|
76
|
+
function csell_GLOBAL_INIT_TAG() { var csell_token_map = {}; csell_token_map['TOK_ITEM_ID_LIST'] = 'know'; csell_token_map['TOK_BEACON_TYPE'] = 'prod'; csell_token_map['TOK_RAND_KEY'] = 't'; csell_token_map['TOK_SPACEID'] = '2022276099'; csell_token_map['TOK_IS_ORDERABLE'] = '2'; csell_token_map['TOK_STORE_ID'] = 'paulgraham'; csell_token_map['TOK_URL'] = 'http://geo.yahoo.com'; csell_token_map['TOK_ORDER_HOST'] = 'order.store.yahoo.net'; c = csell_page_data; var t = csell_token_map; c['s'] = t['TOK_SPACEID']; c['url'] = t['TOK_URL']; c['si'] = t[ts]; c['ii'] = t['TOK_ITEM_ID_LIST']; c['bt'] = t['TOK_BEACON_TYPE']; c['rnd'] = t['TOK_RAND_KEY']; c['io'] = t['TOK_IS_ORDERABLE']; YStore.addItemUrl = 'http%s://'+t['TOK_ORDER_HOST']+'/'+t[ts]+'/ymix/MetaController.html?eventName.addEvent&cartDS.shoppingcart_ROW0_m_orderItemVector_ROW0_m_itemId=%s&cartDS.shoppingcart_ROW0_m_orderItemVector_ROW0_m_quantity=1&ysco_key_cs_item=1§ionId=ysco.cart&ysco_key_store_id='+t[ts]; }
|
77
|
+
</script>
|
78
|
+
<script type="text/javascript">
|
79
|
+
// Begin Yahoo Store Generated Code
|
80
|
+
function csell_REC_VIEW_TAG() { var env = (typeof csell_env == 'string')?csell_env:'prod'; var p = csell_page_data; var a = '/sid='+p['si']+'/io='+p['io']+'/ii='+p['ii']+'/bt='+p['bt']+'-view'+'/en='+env; var r=Math.random(); YStore.CrossSellBeacon.renderBeaconWithRecData(p['url']+'/p/s='+p['s']+'/'+p['rnd']+'='+r+a); }
|
81
|
+
</script>
|
82
|
+
<script type="text/javascript">
|
83
|
+
// Begin Yahoo Store Generated Code
|
84
|
+
var csell_token_map = {}; csell_token_map['TOK_PAGE'] = 'p'; csell_token_map['TOK_WS_URL'] = 'http://paulgraham.csell.store.yahoo.net/cs/recommend?itemids=know&location=p'; csell_token_map['TOK_SHOW_CS_RECS'] = 'false'; csell_token_map['TOK_CURR_SYM'] = '$'; var t = csell_token_map; csell_GLOBAL_INIT_TAG(); YStore.page = t['TOK_PAGE']; YStore.currencySymbol = t['TOK_CURR_SYM']; YStore.crossSellUrl = t['TOK_WS_URL']; YStore.showCSRecs = t['TOK_SHOW_CS_RECS']; </script> <script type="text/javascript" src="test_content_files/recs-1.js"></script> <script type="text/javascript">
|
85
|
+
</script>
|
86
|
+
<script type="text/javascript">
|
87
|
+
<!--
|
88
|
+
if (document.referrer && (document.referrer!='') && window.Image)
|
89
|
+
{
|
90
|
+
(new Image).src="http://redirect1.vip.store.yahoo.com/cgi-bin/referadd?spcl=1&et=54903f22&catalog=paulgraham&r=" + escape(document.referrer) + "&v=1"; }
|
91
|
+
// -->
|
92
|
+
</script><script type="text/javascript">
|
93
|
+
<!--
|
94
|
+
if (document.referrer && (document.referrer!='') && window.Image)
|
95
|
+
{
|
96
|
+
(new Image).src="http://redirect1.vip.store.yahoo.net/cgi-bin/referadd?spcl=1&et=54903f22&catalog=paulgraham&r=" + escape(document.referrer) + "&v=3"; }
|
97
|
+
// -->
|
98
|
+
</script><script type="text/javascript">(function (d, w) { var f = function () { var x = d.getElementsByTagName('SCRIPT')[0]; var s = d.createElement('SCRIPT'); s.type = 'text/javascript'; s.async = true; s.src = '//np.lexity.com/embed/YA/fa27bb6ce937aea400cc8e5f11aa42d5?id=06de067e4a3b'; x.parentNode.insertBefore(s, x); }; w.attachEvent ? w.attachEvent('onload',f) : w.addEventListener('load',f,false); }(document, window)); </script><div id="evernote_clearly__container"><style type="text/css" id="evernote_clearly__css_for_container">#evernote_clearly__container { position: absolute !important; width: 5px !important; height: 5px !important; top: -1000px !important; left: -1000px !important; margin: 0 !important; padding: 0 !important; border: none !important; }
|
99
|
+
#evernote_clearly__definitions { display: none !important; }
|
100
|
+
</style><div id="evernote_clearly__definitions"><div id="evernote_clearly__serialized__option__text_font">"PT Serif"</div><div id="evernote_clearly__serialized__option__text_font_header">"PT Serif"</div><div id="evernote_clearly__serialized__option__text_font_monospace">Inconsolata</div><div id="evernote_clearly__serialized__option__text_size">20px</div><div id="evernote_clearly__serialized__option__text_line_height">1.5em</div><div id="evernote_clearly__serialized__option__box_width">36em</div><div id="evernote_clearly__serialized__option__color_background">#f3f2ee</div><div id="evernote_clearly__serialized__option__color_text">#1f0909</div><div id="evernote_clearly__serialized__option__color_links">#065588</div><div id="evernote_clearly__serialized__option__text_align">normal</div><div id="evernote_clearly__serialized__option__base">theme-1</div><div id="evernote_clearly__serialized__option__footnote_links">on_print</div><div id="evernote_clearly__serialized__option__large_graphics">do_nothing</div><div id="evernote_clearly__serialized__option__custom_css">none</div><div id="evernote_clearly__serialized__var__theme">theme-1</div><div id="evernote_clearly__serialized__var__keys_activation">Control + Command + Right Arrow</div><div id="evernote_clearly__serialized__var__keys_clip">Control + Command + Up Arrow</div><div id="evernote_clearly__serialized__var__keys_highlight">Control + Command + H</div><div id="evernote_clearly__serialized__var__keys_speech">Control + Command + S</div><div id="evernote_clearly__serialized__var__clip_tag">none</div><div id="evernote_clearly__serialized__var__clip_notebook">none</div><div id="evernote_clearly__serialized__var__clip_notebook_guid">none</div><div id="evernote_clearly__serialized__var__related_notes">enabled</div><div id="evernote_clearly__serialized__var__smart_filing">enabled</div><div id="evernote_clearly__serialized__var__smart_filing_for_business">disabled</div><div id="evernote_clearly__serialized__var__speech_speed">normal</div><div id="evernote_clearly__serialized__var__speech_gender">default</div><div id="evernote_clearly__serialized__var__open_notes_in">web</div><div id="evernote_clearly__serialized__var__custom_theme_options">none</div><div id="evernote_clearly__serialized__translation__menu__close__tooltip">Hide the overlay.</div><div id="evernote_clearly__serialized__translation__menu__clip_to_evernote__tooltip">Clip to Evernote.</div><div id="evernote_clearly__serialized__translation__menu__highlight_to_evernote__tooltip">Highlight.</div><div id="evernote_clearly__serialized__translation__menu__print__tooltip">Print.</div><div id="evernote_clearly__serialized__translation__menu__settings__tooltip">Show Themes.</div><div id="evernote_clearly__serialized__translation__menu__fitts__tooltip">Hide the overlay.</div><div id="evernote_clearly__serialized__translation__menu__speak__tooltip">Text To Speech</div><div id="evernote_clearly__serialized__translation__menu__speak__play__tooltip">Play</div><div id="evernote_clearly__serialized__translation__menu__speak__pause__tooltip">Pause</div><div id="evernote_clearly__serialized__translation__menu__speak__forward__tooltip">Go Forwards</div><div id="evernote_clearly__serialized__translation__menu__speak__rewind__tooltip">Go Backwards</div><div id="evernote_clearly__serialized__translation__rtl__main__label">Text direction?</div><div id="evernote_clearly__serialized__translation__rtl__ltr__label">Left-to-right</div><div id="evernote_clearly__serialized__translation__rtl__rtl__label">Right-to-left</div><div id="evernote_clearly__serialized__translation__blank_error__heading">Tips for using Evernote Clearly</div><div id="evernote_clearly__serialized__translation__blank_error__body">Clearly
|
101
|
+
is currently designed to work on article pages. An article page is any
|
102
|
+
page that contains one large block of text -- like, for example, a
|
103
|
+
newspaper article or blog post.</div><div id="evernote_clearly__serialized__translation__related_notes__title">Related Notes</div><div id="evernote_clearly__serialized__translation__related_notes__disable_short">Disable?</div><div id="evernote_clearly__serialized__translation__related_notes__disable_long">Do you want to disable Related Notes?</div><div id="evernote_clearly__serialized__translation__filing_info__title_normal">Filed in:</div><div id="evernote_clearly__serialized__translation__filing_info__title_smart">Smart Filed in:</div><div id="evernote_clearly__serialized__translation__filing_info__default_notebook">Your default Notebook</div><div id="evernote_clearly__serialized__translation__filing_info__view">View</div><div id="evernote_clearly__serialized__translation__filing_info__edit">Edit</div><div id="evernote_clearly__serialized__translation__filing_info__sentence">Clipped into the [=notebook] notebook, and tagged with [=tags].</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_no_tags">Clipped into the [=notebook] notebook.</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_and">and</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_other_tags">other tags</div><div id="evernote_clearly__serialized__translation__evernote_clipping">Clipping...</div><div id="evernote_clearly__serialized__translation__evernote_clipping_failed">Clipping failed.</div><div id="evernote_clearly__serialized__translation__evernote_login__request">To sign in to Evernote, please click the Clearly icon in Chrome's toolbar.</div><div id="evernote_clearly__serialized__translation__evernote_login__heading">Sign in to Evernote</div><div id="evernote_clearly__serialized__translation__evernote_login__spinner">Signing in to Evernote</div><div id="evernote_clearly__serialized__translation__evernote_login__create_account">Create an account</div><div id="evernote_clearly__serialized__translation__evernote_login__button_do__label">Sign in</div><div id="evernote_clearly__serialized__translation__evernote_login__button_cancel__label">Cancel</div><div id="evernote_clearly__serialized__translation__evernote_login__username__label">Evernote Username or Email Address</div><div id="evernote_clearly__serialized__translation__evernote_login__password__label">Password</div><div id="evernote_clearly__serialized__translation__evernote_login__rememberMe__label">Remember me</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__required">Username is required.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__length">Username must be between 1 and 64 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__format">Username contains bad characters.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__invalid">Not a valid, active user.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__required">Password is required.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__length">Password must be between 6 and 64 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__format">Password contains bad characters.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__invalid">Username and password do not match existing user.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__timeout">Login session timed-out. Please try again.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__reset">Your password has expired. Please reset it now.</div><div id="evernote_clearly__serialized__translation__evernote_login__general__error">Authentication failed.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__message__sms">We sent a text message with a verification code to</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__message__google">Enter the verification code displayed in your Google Authenticator app.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__label">Six-digit code</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__required">Verification code is required.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__length">Verification code should be at least 6 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__format">Verification code should be only numbers.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__invalid">Verification code is incorrect.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__button_do__label">Continue</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__button_help__label">I need help getting a verification code</div><div id="evernote_clearly__serialized__translation__settings__theme__1__not_translated">Newsprint</div><div id="evernote_clearly__serialized__translation__settings__theme__2__not_translated">Notable</div><div id="evernote_clearly__serialized__translation__settings__theme__3__not_translated">Night Owl</div><div id="evernote_clearly__serialized__translation__settings__theme__1">Newsprint</div><div id="evernote_clearly__serialized__translation__settings__theme__2">Notable</div><div id="evernote_clearly__serialized__translation__settings__theme__3">Night Owl</div><div id="evernote_clearly__serialized__translation__settings__theme__custom">Custom</div><div id="evernote_clearly__serialized__translation__settings__fontSize__small">small</div><div id="evernote_clearly__serialized__translation__settings__fontSize__medium">medium</div><div id="evernote_clearly__serialized__translation__settings__fontSize__large">large</div><div id="evernote_clearly__serialized__translation__features__title__new">You have a new version of Evernote Clearly</div><div id="evernote_clearly__serialized__translation__features__title__all">Welcome to the new Evernote Clearly</div><div id="evernote_clearly__serialized__translation__features__speech__title">Text To Speech</div><div id="evernote_clearly__serialized__translation__features__speech__text">Sit
|
104
|
+
back and let Clearly read blog posts, articles, and web pages to you
|
105
|
+
thanks to the new Text To Speech feature, available exclusively for
|
106
|
+
Evernote Premium subscribers.</div><div id="evernote_clearly__serialized__translation__features__speech__text__powered">Evernote Clearly is powered by [=service].</div><div id="evernote_clearly__serialized__translation__features__speech__text__requires">Requires [=product].</div><div id="evernote_clearly__serialized__translation__features__speech__text__available">Text To Speech in 21 languages:</div><div id="evernote_clearly__serialized__translation__features__speech__text__available_languages">English,
|
107
|
+
Japanese, Spanish, French, German, Chinese, Korean, Arabic, Czech,
|
108
|
+
Danish, Dutch, Finnish, Greek, Hungarian, Italian, Norwegian, Polish,
|
109
|
+
Portuguese, Russian, Swedish and Turkish.</div><div id="evernote_clearly__serialized__translation__features__speech__text__try">Try Text To Speech</div><div id="evernote_clearly__serialized__translation__features__speech__text__upgrade">Upgrade to Evernote Premium</div><div id="evernote_clearly__serialized__translation__features__speech__text__language">Language not supported</div><div id="evernote_clearly__serialized__translation__features__speech__text__play">Play using this language</div><div id="evernote_clearly__serialized__translation__features__speech__text__cancel">Cancel</div><div id="evernote_clearly__serialized__translation__features__speech__no_language_title">Language not supported</div><div id="evernote_clearly__serialized__translation__features__speech__no_language_explanation">Evernote
|
110
|
+
Clearly was not able to determine the language of this article. If you
|
111
|
+
recognize the language, select it below and we'll play it.</div><div id="evernote_clearly__serialized__translation__features__clipping__title">Clip to Evernote</div><div id="evernote_clearly__serialized__translation__features__clipping__text">Save what you're reading to your Evernote account with one click. Access clips from any device, anytime in Evernote.</div><div id="evernote_clearly__serialized__translation__features__highlighting__title">Highlighting</div><div id="evernote_clearly__serialized__translation__features__highlighting__text">Highlight
|
112
|
+
text you want to remember & quickly find it in your Evernote
|
113
|
+
account. Highlighting changes you make in Clearly will be updated in
|
114
|
+
your Evernote account automatically.</div><div id="evernote_clearly__serialized__translation__features__related_notes__title">Related Notes</div><div id="evernote_clearly__serialized__translation__features__related_notes__text">Magically
|
115
|
+
rediscover notes from your Evernote account that are related to the
|
116
|
+
page you are viewing. Related Notes are displayed at the bottom of the
|
117
|
+
article or on the right side if space permits.</div><div id="evernote_clearly__serialized__translation__features__smart_filing__title">Smart Filing</div><div id="evernote_clearly__serialized__translation__features__smart_filing__text">Automatically assign tags to your Web clips and saves them to the appropriate notebook, so you don't have to.</div><div id="evernote_clearly__serialized__translation__features__eula_notice">By using Clearly, you agree to our [=eula].</div><div id="evernote_clearly__serialized__translation__features__close2">Close</div><div id="evernote_clearly__serialized__translation__misc__page">page</div></div><script class="evernote_clearly__launcher" src="test_content_files/launch.js"></script><script id="evernote_clearly__init" src="test_content_files/init.js"></script><iframe scrolling="auto" allowtransparency="true" id="evernote_clearly__controller" frameborder="0"></iframe><style type="text/css" id="evernote_clearly__css_for_reformat">html.evernote_clearly__before_visible, html > body.evernote_clearly__before_visible, body.evernote_clearly__before_visible { position: static !important; margin: 0 !important; padding: 0 !important; border: 0 !important; }
|
118
|
+
html.evernote_clearly__is_visible, html > body.evernote_clearly__is_visible, body.evernote_clearly__is_visible { overflow: hidden !important; overflow-x: hidden !important; overflow-y: hidden !important; }
|
119
|
+
html.evernote_clearly__before_visible object, html.evernote_clearly__before_visible embed, html.evernote_clearly__before_visible iframe, html > body.evernote_clearly__before_visible object, html > body.evernote_clearly__before_visible embed, html > body.evernote_clearly__before_visible iframe, body.evernote_clearly__before_visible object, body.evernote_clearly__before_visible embed, body.evernote_clearly__before_visible iframe { visibility: hidden !important; }
|
120
|
+
#evernote_clearly__reformat { margin: 0 !important; padding: 0 !important; border: none !important; position: absolute !important; width: 100% !important; height: 100% !important; min-height: 100% !important; top: -100%; left: -100%; z-index: 2147483647 !important; }
|
121
|
+
html.evernote_clearly__before_visible #evernote_clearly__reformat, html > body.evernote_clearly__before_visible #evernote_clearly__reformat, body.evernote_clearly__before_visible #evernote_clearly__reformat, #evernote_clearly__reformat { display: block !important; overflow: auto !important; visibility: visible !important; }
|
122
|
+
</style></div><iframe style="top: 0px; left: 0px;" scrolling="auto" allowtransparency="true" id="evernote_clearly__reformat" frameborder="0"></iframe><style type="text/css" id="clearly_next_pages_container__css">#clearly_next_pages_container { margin: 0; padding: 0; border: none; position: absolute; width: 10px; height: 10px; top: -100px; left: -100px; } #clearly_next_pages_container iframe { margin: 0; padding: 0; border: none; position: absolute; width: 10px; height: 10px; top: -100px; left: -100px; } </style><div id="clearly_next_pages_container"></div></body></html>
|
data/test/test_helper.rb
ADDED
data/test/xp_test.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require './lib/xp'
|
2
|
+
|
3
|
+
HTML = File.open('./test/test_content.html').read
|
4
|
+
XPATH_QUERY = '//title'
|
5
|
+
XPATH_RESULT = "<title>How You Know</title>\n"
|
6
|
+
|
7
|
+
CSS_QUERY = 'title'
|
8
|
+
CSS_RESULT = "<title>How You Know</title>\n"
|
9
|
+
|
10
|
+
LYNX = 'http://www.delorie.com/web/lynxview.html'
|
11
|
+
LYNX_TAGLINE = 'Lynx Viewer'
|
12
|
+
SIMPLE_PAGE_URL = "http://w3m.sourceforge.net/"
|
13
|
+
TEST_URL = 'http://qucentis.com/unavailable_404_link'
|
14
|
+
TEST_FILE_URL = 'http://abc.com/file/hello.mov?key=123&id=569'
|
15
|
+
TEST_FILE_BASENAME = 'hello'
|
16
|
+
TEST_FILE_EXTENSION = '.mov'
|
17
|
+
|
18
|
+
class TestXP < Minitest::Test
|
19
|
+
String.send(:include, XP)
|
20
|
+
|
21
|
+
def setup
|
22
|
+
@html = File.open('./test/test_content.html').read
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_all_methods_introduced
|
26
|
+
%w|to_nokogiri page_source download|.each do |method|
|
27
|
+
assert_respond_to "http://google.com", method
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_to_nokogiri
|
32
|
+
assert_instance_of Nokogiri::HTML::Document, HTML.to_nokogiri
|
33
|
+
assert_equal LYNX_TAGLINE, LYNX.to_nokogiri.xpath('//title/text()').to_s
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_conversion_to_nodeset
|
37
|
+
assert_equal HTML.to_nokogiri.css(CSS_QUERY).to_html, CSS_RESULT
|
38
|
+
assert_equal HTML.to_nokogiri.css(XPATH_QUERY).to_html, XPATH_RESULT
|
39
|
+
assert_equal HTML.to_nokogiri.css(XPATH_QUERY).xpath("//title/text()").to_html, "How You Know"
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_page_source
|
43
|
+
assert_equal SIMPLE_PAGE_URL.page_source.to_nokogiri.xpath("//address/a/text()").to_html, "aito@fw.ipsj.or.jp"
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_css
|
47
|
+
assert_respond_to "", :css
|
48
|
+
assert_equal HTML.to_nokogiri.css(CSS_QUERY).to_html, HTML.css(CSS_QUERY).to_html
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_xpath
|
52
|
+
assert_respond_to "", :xpath
|
53
|
+
assert_equal HTML.to_nokogiri.xpath(XPATH_QUERY).to_html, HTML.xpath(XPATH_QUERY).to_html
|
54
|
+
end
|
55
|
+
end
|
data/xp.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "xp"
|
6
|
+
s.version = "2.0.0"
|
7
|
+
s.authors = ["Jikku Jose"]
|
8
|
+
s.email = ['jikkujose@gmail.com']
|
9
|
+
s.summary = "Gem that enables String class to help quick & dirty scraping tasks."
|
10
|
+
s.description = "Provides a monkey patched String class that can download & filter web pages using CSS/XPATH selectors; also has a very intuitive method to download files directly from their urls."
|
11
|
+
s.homepage = "http://github.com/JikkuJose/xp"
|
12
|
+
s.license = "MIT"
|
13
|
+
s.files = `git ls-files -z`.split("\x0")
|
14
|
+
s.executables = ["xp"]
|
15
|
+
|
16
|
+
s.add_dependency "nokogiri", '~> 1.6'
|
17
|
+
end
|
metadata
CHANGED
@@ -1,37 +1,61 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jikku Jose
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
27
|
-
description:
|
28
|
-
|
26
|
+
version: '1.6'
|
27
|
+
description: Provides a monkey patched String class that can download & filter web
|
28
|
+
pages using CSS/XPATH selectors; also has a very intuitive method to download files
|
29
|
+
directly from their urls.
|
30
|
+
email:
|
31
|
+
- jikkujose@gmail.com
|
29
32
|
executables:
|
30
33
|
- xp
|
31
34
|
extensions: []
|
32
35
|
extra_rdoc_files: []
|
33
36
|
files:
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- Gemfile.lock
|
40
|
+
- LICENSE.txt
|
41
|
+
- README.markdown
|
42
|
+
- Rakefile
|
34
43
|
- bin/xp
|
44
|
+
- lib/user_agents.rb
|
45
|
+
- lib/xp.rb
|
46
|
+
- pkg/xp-0.0.1.gem
|
47
|
+
- pkg/xp-0.0.2.gem
|
48
|
+
- pkg/xp-1.0.0.gem
|
49
|
+
- research/attribute_extractor.rb
|
50
|
+
- research/bb_parser.rb
|
51
|
+
- research/interface.rb
|
52
|
+
- research/load.rb
|
53
|
+
- research/selector.rb
|
54
|
+
- research/y.htm
|
55
|
+
- test/test_content.html
|
56
|
+
- test/test_helper.rb
|
57
|
+
- test/xp_test.rb
|
58
|
+
- xp.gemspec
|
35
59
|
homepage: http://github.com/JikkuJose/xp
|
36
60
|
licenses:
|
37
61
|
- MIT
|
@@ -55,5 +79,5 @@ rubyforge_project:
|
|
55
79
|
rubygems_version: 2.2.2
|
56
80
|
signing_key:
|
57
81
|
specification_version: 4
|
58
|
-
summary:
|
82
|
+
summary: Gem that enables String class to help quick & dirty scraping tasks.
|
59
83
|
test_files: []
|