xp 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +16 -0
- data/LICENSE.txt +22 -0
- data/README.markdown +38 -0
- data/Rakefile +1 -0
- data/bin/xp +13 -12
- data/lib/user_agents.rb +10 -0
- data/lib/xp.rb +55 -0
- data/pkg/xp-0.0.1.gem +0 -0
- data/pkg/xp-0.0.2.gem +0 -0
- data/pkg/xp-1.0.0.gem +0 -0
- data/research/attribute_extractor.rb +7 -0
- data/research/bb_parser.rb +8 -0
- data/research/interface.rb +7 -0
- data/research/load.rb +1 -0
- data/research/selector.rb +10 -0
- data/research/y.htm +4452 -0
- data/test/test_content.html +122 -0
- data/test/test_helper.rb +6 -0
- data/test/xp_test.rb +55 -0
- data/xp.gemspec +17 -0
- metadata +33 -9
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
|
2
|
+
<html class="evernote_clearly__before_visible evernote_clearly__is_visible"><head>
|
|
3
|
+
<meta http-equiv="content-type" content="text/html; charset=windows-1252"><script src="test_content_files/fa27bb6ce937aea400cc8e5f11aa42d5.js" async="" type="text/javascript"></script><script type="text/javascript">
|
|
4
|
+
<!--
|
|
5
|
+
(new Image).src="http://store.yahoo.net/cgi-bin/refsd?e=http://paulgraham.com/know.html&h=paulgraham.com&v=1.0&dr=" + escape(document.referrer);
|
|
6
|
+
-->
|
|
7
|
+
</script>
|
|
8
|
+
<title>How You Know</title><!-- <META NAME="ROBOTS" CONTENT="NOODP"> -->
|
|
9
|
+
<link rel="shortcut icon" href="http://ycombinator.com/arc/arc.png">
|
|
10
|
+
<style type="text/css"></style></head><body class="evernote_clearly__before_visible evernote_clearly__is_visible" bgcolor="ffffff" background="test_content_files/paulgraham_2271_0.gif" text="000000" vlink="464646" link="000099"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr valign="top"><td><map name="10fe3548f29bf95"><area shape="rect" coords="0,0,67,21" href="http://paulgraham.com/index.html"><area shape="rect" coords="0,21,67,42" href="http://paulgraham.com/articles.html"><area shape="rect" coords="0,42,67,63" href="http://www.amazon.com/gp/product/0596006624"><area shape="rect" coords="0,63,67,84" href="http://paulgraham.com/books.html"><area shape="rect" coords="0,84,67,105" href="http://ycombinator.com/"><area shape="rect" coords="0,105,67,126" href="http://startupschool.org/"><area shape="rect" coords="0,126,67,147" href="http://paulgraham.com/arc.html"><area shape="rect" coords="0,147,67,168" href="http://paulgraham.com/lisp.html"><area shape="rect" coords="0,168,67,189" href="http://paulgraham.com/antispam.html"><area shape="rect" coords="0,189,67,210" href="http://paulgraham.com/kedrosky.html"><area shape="rect" coords="0,210,67,231" href="http://paulgraham.com/faq.html"><area shape="rect" coords="0,231,67,252" href="http://paulgraham.com/raq.html"><area shape="rect" coords="0,252,67,273" href="http://paulgraham.com/quo.html"><area shape="rect" coords="0,273,67,294" href="http://paulgraham.com/rss.html"><area shape="rect" coords="0,294,67,315" href="http://paulgraham.com/bio.html"><area shape="rect" coords="0,315,67,336" href="https://twitter.com/paulg"><area shape="rect" coords="0,336,67,357" href="http://paulgraham.com/nsearch.html"><area shape="rect" coords="0,357,67,378" href="http://paulgraham.com/ind.html"></map><img src="test_content_files/essays-1.gif" usemap="#10fe3548f29bf95" ismap="ismap" border="0" height="378" hspace="0" vspace="0" width="69"></td><td><img src="test_content_files/trans_1x1.gif" border="0" height="1" width="26"></td><td><a href="http://paulgraham.com/index.html"><img src="test_content_files/paulgraham_2271_3232.gif" border="0" height="45" hspace="0" vspace="0" width="410"></a><br><br><table border="0" cellpadding="0" cellspacing="0" width="435"><tbody><tr valign="top"><td width="435"><img src="test_content_files/how-you-know-1.gif" alt="How You Know" border="0" height="18" hspace="0" vspace="0" width="120"><br><br><font face="verdana" size="2">December 2014<br><br>I've read Villehardouin's chronicle of the Fourth Crusade at least
|
|
11
|
+
two times, maybe three. And yet if I had to write down everything
|
|
12
|
+
I remember from it, I doubt it would amount to much more than a
|
|
13
|
+
page. Multiply this times several hundred, and I get an uneasy
|
|
14
|
+
feeling when I look at my bookshelves. What use is it to read all
|
|
15
|
+
these books if I remember so little from them?<br><br>A few months ago, as I was reading Constance Reid's excellent
|
|
16
|
+
biography of Hilbert, I figured out if not the answer to this
|
|
17
|
+
question, at least something that made me feel better about it.
|
|
18
|
+
She writes:
|
|
19
|
+
<blockquote>
|
|
20
|
+
Hilbert had no patience with mathematical lectures which filled
|
|
21
|
+
the students with facts but did not teach them how to frame a
|
|
22
|
+
problem and solve it. He often used to tell them that "a perfect
|
|
23
|
+
formulation of a problem is already half its solution."
|
|
24
|
+
</blockquote>
|
|
25
|
+
That has always seemed to me an important point, and I was even
|
|
26
|
+
more convinced of it after hearing it confirmed by Hilbert.<br><br>But how had I come to believe in this idea in the first place? A
|
|
27
|
+
combination of my own experience and other things I'd read. None
|
|
28
|
+
of which I could at that moment remember! And eventually I'd forget
|
|
29
|
+
that Hilbert had confirmed it too. But my increased belief in the
|
|
30
|
+
importance of this idea would remain something I'd learned from
|
|
31
|
+
this book, even after I'd forgotten I'd learned it.<br><br>Reading and experience train your model of the world. And even if
|
|
32
|
+
you forget the experience or what you read, its effect on your model
|
|
33
|
+
of the world persists. Your mind is like a compiled program you've
|
|
34
|
+
lost the source of. It works, but you don't know why.<br><br>The place to look for what I learned from Villehardouin's chronicle
|
|
35
|
+
is not what I remember from it, but my mental models of the crusades,
|
|
36
|
+
Venice, medieval culture, siege warfare, and so on. Which doesn't
|
|
37
|
+
mean I couldn't have read more attentively, but at least the harvest
|
|
38
|
+
of reading is not so miserably small as it might seem.<br><br>This is one of those things that seem obvious in retrospect. But
|
|
39
|
+
it was a surprise to me and presumably would be to anyone else who
|
|
40
|
+
felt uneasy about (apparently) forgetting so much they'd read.<br><br>Realizing it does more than make you feel a little better about
|
|
41
|
+
forgetting, though. There are specific implications.<br><br>For example, reading and experience are usually "compiled" at the
|
|
42
|
+
time they happen, using the state of your brain at that time. The
|
|
43
|
+
same book would get compiled differently at different points in
|
|
44
|
+
your life. Which means it is very much worth reading important
|
|
45
|
+
books multiple times. I always used to feel some misgivings about
|
|
46
|
+
rereading books. I unconsciously lumped reading together with work
|
|
47
|
+
like carpentry, where having to do something again is a sign you
|
|
48
|
+
did it wrong the first time. Whereas now the phrase "already read"
|
|
49
|
+
seems almost ill-formed.<br><br>Intriguingly, this implication isn't limited to books. Technology
|
|
50
|
+
will increasingly make it possible to relive our experiences. When
|
|
51
|
+
people do that today it's usually to enjoy them again (e.g. when
|
|
52
|
+
looking at pictures of a trip) or to find the origin of some bug in
|
|
53
|
+
their compiled code (e.g. when Stephen Fry succeeded in remembering
|
|
54
|
+
the childhood trauma that prevented him from singing). But as
|
|
55
|
+
technologies for recording and playing back your life improve, it
|
|
56
|
+
may become common for people to relive experiences without any goal
|
|
57
|
+
in mind, simply to learn from them again as one might when rereading
|
|
58
|
+
a book.<br><br>Eventually we may be able not just to play back experiences but
|
|
59
|
+
also to index and even edit them. So although not knowing how you
|
|
60
|
+
know things may seem part of being human, it may not be.<br><br><br><br><br><br><br><br>
|
|
61
|
+
<b>Thanks</b> to Sam Altman, Jessica Livingston, and Robert Morris for reading
|
|
62
|
+
drafts of this.<br><br></font></td></tr></tbody></table><table border="0" cellpadding="0" cellspacing="0" width="435"><tbody><tr><td><font face="verdana" size="2"><br><br><hr></font></td></tr></tbody></table></td></tr></tbody></table>
|
|
63
|
+
<script type="text/javascript">
|
|
64
|
+
csell_env = 'bf1';
|
|
65
|
+
</script>
|
|
66
|
+
<script type="text/javascript">
|
|
67
|
+
// Begin Yahoo Store Generated Code
|
|
68
|
+
</script> <script type="text/javascript" src="test_content_files/ylc_1.js"></script> <script type="text/javascript" src="test_content_files/beacon-1.js">
|
|
69
|
+
</script>
|
|
70
|
+
<script type="text/javascript">
|
|
71
|
+
// Begin Yahoo Store Generated Code
|
|
72
|
+
csell_page_data = {}; csell_page_rec_data = []; ts='TOK_STORE_ID';
|
|
73
|
+
</script>
|
|
74
|
+
<script type="text/javascript">
|
|
75
|
+
// Begin Yahoo Store Generated Code
|
|
76
|
+
function csell_GLOBAL_INIT_TAG() { var csell_token_map = {}; csell_token_map['TOK_ITEM_ID_LIST'] = 'know'; csell_token_map['TOK_BEACON_TYPE'] = 'prod'; csell_token_map['TOK_RAND_KEY'] = 't'; csell_token_map['TOK_SPACEID'] = '2022276099'; csell_token_map['TOK_IS_ORDERABLE'] = '2'; csell_token_map['TOK_STORE_ID'] = 'paulgraham'; csell_token_map['TOK_URL'] = 'http://geo.yahoo.com'; csell_token_map['TOK_ORDER_HOST'] = 'order.store.yahoo.net'; c = csell_page_data; var t = csell_token_map; c['s'] = t['TOK_SPACEID']; c['url'] = t['TOK_URL']; c['si'] = t[ts]; c['ii'] = t['TOK_ITEM_ID_LIST']; c['bt'] = t['TOK_BEACON_TYPE']; c['rnd'] = t['TOK_RAND_KEY']; c['io'] = t['TOK_IS_ORDERABLE']; YStore.addItemUrl = 'http%s://'+t['TOK_ORDER_HOST']+'/'+t[ts]+'/ymix/MetaController.html?eventName.addEvent&cartDS.shoppingcart_ROW0_m_orderItemVector_ROW0_m_itemId=%s&cartDS.shoppingcart_ROW0_m_orderItemVector_ROW0_m_quantity=1&ysco_key_cs_item=1§ionId=ysco.cart&ysco_key_store_id='+t[ts]; }
|
|
77
|
+
</script>
|
|
78
|
+
<script type="text/javascript">
|
|
79
|
+
// Begin Yahoo Store Generated Code
|
|
80
|
+
function csell_REC_VIEW_TAG() { var env = (typeof csell_env == 'string')?csell_env:'prod'; var p = csell_page_data; var a = '/sid='+p['si']+'/io='+p['io']+'/ii='+p['ii']+'/bt='+p['bt']+'-view'+'/en='+env; var r=Math.random(); YStore.CrossSellBeacon.renderBeaconWithRecData(p['url']+'/p/s='+p['s']+'/'+p['rnd']+'='+r+a); }
|
|
81
|
+
</script>
|
|
82
|
+
<script type="text/javascript">
|
|
83
|
+
// Begin Yahoo Store Generated Code
|
|
84
|
+
var csell_token_map = {}; csell_token_map['TOK_PAGE'] = 'p'; csell_token_map['TOK_WS_URL'] = 'http://paulgraham.csell.store.yahoo.net/cs/recommend?itemids=know&location=p'; csell_token_map['TOK_SHOW_CS_RECS'] = 'false'; csell_token_map['TOK_CURR_SYM'] = '$'; var t = csell_token_map; csell_GLOBAL_INIT_TAG(); YStore.page = t['TOK_PAGE']; YStore.currencySymbol = t['TOK_CURR_SYM']; YStore.crossSellUrl = t['TOK_WS_URL']; YStore.showCSRecs = t['TOK_SHOW_CS_RECS']; </script> <script type="text/javascript" src="test_content_files/recs-1.js"></script> <script type="text/javascript">
|
|
85
|
+
</script>
|
|
86
|
+
<script type="text/javascript">
|
|
87
|
+
<!--
|
|
88
|
+
if (document.referrer && (document.referrer!='') && window.Image)
|
|
89
|
+
{
|
|
90
|
+
(new Image).src="http://redirect1.vip.store.yahoo.com/cgi-bin/referadd?spcl=1&et=54903f22&catalog=paulgraham&r=" + escape(document.referrer) + "&v=1"; }
|
|
91
|
+
// -->
|
|
92
|
+
</script><script type="text/javascript">
|
|
93
|
+
<!--
|
|
94
|
+
if (document.referrer && (document.referrer!='') && window.Image)
|
|
95
|
+
{
|
|
96
|
+
(new Image).src="http://redirect1.vip.store.yahoo.net/cgi-bin/referadd?spcl=1&et=54903f22&catalog=paulgraham&r=" + escape(document.referrer) + "&v=3"; }
|
|
97
|
+
// -->
|
|
98
|
+
</script><script type="text/javascript">(function (d, w) { var f = function () { var x = d.getElementsByTagName('SCRIPT')[0]; var s = d.createElement('SCRIPT'); s.type = 'text/javascript'; s.async = true; s.src = '//np.lexity.com/embed/YA/fa27bb6ce937aea400cc8e5f11aa42d5?id=06de067e4a3b'; x.parentNode.insertBefore(s, x); }; w.attachEvent ? w.attachEvent('onload',f) : w.addEventListener('load',f,false); }(document, window)); </script><div id="evernote_clearly__container"><style type="text/css" id="evernote_clearly__css_for_container">#evernote_clearly__container { position: absolute !important; width: 5px !important; height: 5px !important; top: -1000px !important; left: -1000px !important; margin: 0 !important; padding: 0 !important; border: none !important; }
|
|
99
|
+
#evernote_clearly__definitions { display: none !important; }
|
|
100
|
+
</style><div id="evernote_clearly__definitions"><div id="evernote_clearly__serialized__option__text_font">"PT Serif"</div><div id="evernote_clearly__serialized__option__text_font_header">"PT Serif"</div><div id="evernote_clearly__serialized__option__text_font_monospace">Inconsolata</div><div id="evernote_clearly__serialized__option__text_size">20px</div><div id="evernote_clearly__serialized__option__text_line_height">1.5em</div><div id="evernote_clearly__serialized__option__box_width">36em</div><div id="evernote_clearly__serialized__option__color_background">#f3f2ee</div><div id="evernote_clearly__serialized__option__color_text">#1f0909</div><div id="evernote_clearly__serialized__option__color_links">#065588</div><div id="evernote_clearly__serialized__option__text_align">normal</div><div id="evernote_clearly__serialized__option__base">theme-1</div><div id="evernote_clearly__serialized__option__footnote_links">on_print</div><div id="evernote_clearly__serialized__option__large_graphics">do_nothing</div><div id="evernote_clearly__serialized__option__custom_css">none</div><div id="evernote_clearly__serialized__var__theme">theme-1</div><div id="evernote_clearly__serialized__var__keys_activation">Control + Command + Right Arrow</div><div id="evernote_clearly__serialized__var__keys_clip">Control + Command + Up Arrow</div><div id="evernote_clearly__serialized__var__keys_highlight">Control + Command + H</div><div id="evernote_clearly__serialized__var__keys_speech">Control + Command + S</div><div id="evernote_clearly__serialized__var__clip_tag">none</div><div id="evernote_clearly__serialized__var__clip_notebook">none</div><div id="evernote_clearly__serialized__var__clip_notebook_guid">none</div><div id="evernote_clearly__serialized__var__related_notes">enabled</div><div id="evernote_clearly__serialized__var__smart_filing">enabled</div><div id="evernote_clearly__serialized__var__smart_filing_for_business">disabled</div><div id="evernote_clearly__serialized__var__speech_speed">normal</div><div id="evernote_clearly__serialized__var__speech_gender">default</div><div id="evernote_clearly__serialized__var__open_notes_in">web</div><div id="evernote_clearly__serialized__var__custom_theme_options">none</div><div id="evernote_clearly__serialized__translation__menu__close__tooltip">Hide the overlay.</div><div id="evernote_clearly__serialized__translation__menu__clip_to_evernote__tooltip">Clip to Evernote.</div><div id="evernote_clearly__serialized__translation__menu__highlight_to_evernote__tooltip">Highlight.</div><div id="evernote_clearly__serialized__translation__menu__print__tooltip">Print.</div><div id="evernote_clearly__serialized__translation__menu__settings__tooltip">Show Themes.</div><div id="evernote_clearly__serialized__translation__menu__fitts__tooltip">Hide the overlay.</div><div id="evernote_clearly__serialized__translation__menu__speak__tooltip">Text To Speech</div><div id="evernote_clearly__serialized__translation__menu__speak__play__tooltip">Play</div><div id="evernote_clearly__serialized__translation__menu__speak__pause__tooltip">Pause</div><div id="evernote_clearly__serialized__translation__menu__speak__forward__tooltip">Go Forwards</div><div id="evernote_clearly__serialized__translation__menu__speak__rewind__tooltip">Go Backwards</div><div id="evernote_clearly__serialized__translation__rtl__main__label">Text direction?</div><div id="evernote_clearly__serialized__translation__rtl__ltr__label">Left-to-right</div><div id="evernote_clearly__serialized__translation__rtl__rtl__label">Right-to-left</div><div id="evernote_clearly__serialized__translation__blank_error__heading">Tips for using Evernote Clearly</div><div id="evernote_clearly__serialized__translation__blank_error__body">Clearly
|
|
101
|
+
is currently designed to work on article pages. An article page is any
|
|
102
|
+
page that contains one large block of text -- like, for example, a
|
|
103
|
+
newspaper article or blog post.</div><div id="evernote_clearly__serialized__translation__related_notes__title">Related Notes</div><div id="evernote_clearly__serialized__translation__related_notes__disable_short">Disable?</div><div id="evernote_clearly__serialized__translation__related_notes__disable_long">Do you want to disable Related Notes?</div><div id="evernote_clearly__serialized__translation__filing_info__title_normal">Filed in:</div><div id="evernote_clearly__serialized__translation__filing_info__title_smart">Smart Filed in:</div><div id="evernote_clearly__serialized__translation__filing_info__default_notebook">Your default Notebook</div><div id="evernote_clearly__serialized__translation__filing_info__view">View</div><div id="evernote_clearly__serialized__translation__filing_info__edit">Edit</div><div id="evernote_clearly__serialized__translation__filing_info__sentence">Clipped into the [=notebook] notebook, and tagged with [=tags].</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_no_tags">Clipped into the [=notebook] notebook.</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_and">and</div><div id="evernote_clearly__serialized__translation__filing_info__sentence_other_tags">other tags</div><div id="evernote_clearly__serialized__translation__evernote_clipping">Clipping...</div><div id="evernote_clearly__serialized__translation__evernote_clipping_failed">Clipping failed.</div><div id="evernote_clearly__serialized__translation__evernote_login__request">To sign in to Evernote, please click the Clearly icon in Chrome's toolbar.</div><div id="evernote_clearly__serialized__translation__evernote_login__heading">Sign in to Evernote</div><div id="evernote_clearly__serialized__translation__evernote_login__spinner">Signing in to Evernote</div><div id="evernote_clearly__serialized__translation__evernote_login__create_account">Create an account</div><div id="evernote_clearly__serialized__translation__evernote_login__button_do__label">Sign in</div><div id="evernote_clearly__serialized__translation__evernote_login__button_cancel__label">Cancel</div><div id="evernote_clearly__serialized__translation__evernote_login__username__label">Evernote Username or Email Address</div><div id="evernote_clearly__serialized__translation__evernote_login__password__label">Password</div><div id="evernote_clearly__serialized__translation__evernote_login__rememberMe__label">Remember me</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__required">Username is required.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__length">Username must be between 1 and 64 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__format">Username contains bad characters.</div><div id="evernote_clearly__serialized__translation__evernote_login__username__error__invalid">Not a valid, active user.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__required">Password is required.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__length">Password must be between 6 and 64 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__format">Password contains bad characters.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__invalid">Username and password do not match existing user.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__timeout">Login session timed-out. Please try again.</div><div id="evernote_clearly__serialized__translation__evernote_login__password__error__reset">Your password has expired. Please reset it now.</div><div id="evernote_clearly__serialized__translation__evernote_login__general__error">Authentication failed.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__message__sms">We sent a text message with a verification code to</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__message__google">Enter the verification code displayed in your Google Authenticator app.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__label">Six-digit code</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__required">Verification code is required.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__length">Verification code should be at least 6 characters long.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__format">Verification code should be only numbers.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__code__error__invalid">Verification code is incorrect.</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__button_do__label">Continue</div><div id="evernote_clearly__serialized__translation__evernote_two_factor__button_help__label">I need help getting a verification code</div><div id="evernote_clearly__serialized__translation__settings__theme__1__not_translated">Newsprint</div><div id="evernote_clearly__serialized__translation__settings__theme__2__not_translated">Notable</div><div id="evernote_clearly__serialized__translation__settings__theme__3__not_translated">Night Owl</div><div id="evernote_clearly__serialized__translation__settings__theme__1">Newsprint</div><div id="evernote_clearly__serialized__translation__settings__theme__2">Notable</div><div id="evernote_clearly__serialized__translation__settings__theme__3">Night Owl</div><div id="evernote_clearly__serialized__translation__settings__theme__custom">Custom</div><div id="evernote_clearly__serialized__translation__settings__fontSize__small">small</div><div id="evernote_clearly__serialized__translation__settings__fontSize__medium">medium</div><div id="evernote_clearly__serialized__translation__settings__fontSize__large">large</div><div id="evernote_clearly__serialized__translation__features__title__new">You have a new version of Evernote Clearly</div><div id="evernote_clearly__serialized__translation__features__title__all">Welcome to the new Evernote Clearly</div><div id="evernote_clearly__serialized__translation__features__speech__title">Text To Speech</div><div id="evernote_clearly__serialized__translation__features__speech__text">Sit
|
|
104
|
+
back and let Clearly read blog posts, articles, and web pages to you
|
|
105
|
+
thanks to the new Text To Speech feature, available exclusively for
|
|
106
|
+
Evernote Premium subscribers.</div><div id="evernote_clearly__serialized__translation__features__speech__text__powered">Evernote Clearly is powered by [=service].</div><div id="evernote_clearly__serialized__translation__features__speech__text__requires">Requires [=product].</div><div id="evernote_clearly__serialized__translation__features__speech__text__available">Text To Speech in 21 languages:</div><div id="evernote_clearly__serialized__translation__features__speech__text__available_languages">English,
|
|
107
|
+
Japanese, Spanish, French, German, Chinese, Korean, Arabic, Czech,
|
|
108
|
+
Danish, Dutch, Finnish, Greek, Hungarian, Italian, Norwegian, Polish,
|
|
109
|
+
Portuguese, Russian, Swedish and Turkish.</div><div id="evernote_clearly__serialized__translation__features__speech__text__try">Try Text To Speech</div><div id="evernote_clearly__serialized__translation__features__speech__text__upgrade">Upgrade to Evernote Premium</div><div id="evernote_clearly__serialized__translation__features__speech__text__language">Language not supported</div><div id="evernote_clearly__serialized__translation__features__speech__text__play">Play using this language</div><div id="evernote_clearly__serialized__translation__features__speech__text__cancel">Cancel</div><div id="evernote_clearly__serialized__translation__features__speech__no_language_title">Language not supported</div><div id="evernote_clearly__serialized__translation__features__speech__no_language_explanation">Evernote
|
|
110
|
+
Clearly was not able to determine the language of this article. If you
|
|
111
|
+
recognize the language, select it below and we'll play it.</div><div id="evernote_clearly__serialized__translation__features__clipping__title">Clip to Evernote</div><div id="evernote_clearly__serialized__translation__features__clipping__text">Save what you're reading to your Evernote account with one click. Access clips from any device, anytime in Evernote.</div><div id="evernote_clearly__serialized__translation__features__highlighting__title">Highlighting</div><div id="evernote_clearly__serialized__translation__features__highlighting__text">Highlight
|
|
112
|
+
text you want to remember & quickly find it in your Evernote
|
|
113
|
+
account. Highlighting changes you make in Clearly will be updated in
|
|
114
|
+
your Evernote account automatically.</div><div id="evernote_clearly__serialized__translation__features__related_notes__title">Related Notes</div><div id="evernote_clearly__serialized__translation__features__related_notes__text">Magically
|
|
115
|
+
rediscover notes from your Evernote account that are related to the
|
|
116
|
+
page you are viewing. Related Notes are displayed at the bottom of the
|
|
117
|
+
article or on the right side if space permits.</div><div id="evernote_clearly__serialized__translation__features__smart_filing__title">Smart Filing</div><div id="evernote_clearly__serialized__translation__features__smart_filing__text">Automatically assign tags to your Web clips and saves them to the appropriate notebook, so you don't have to.</div><div id="evernote_clearly__serialized__translation__features__eula_notice">By using Clearly, you agree to our [=eula].</div><div id="evernote_clearly__serialized__translation__features__close2">Close</div><div id="evernote_clearly__serialized__translation__misc__page">page</div></div><script class="evernote_clearly__launcher" src="test_content_files/launch.js"></script><script id="evernote_clearly__init" src="test_content_files/init.js"></script><iframe scrolling="auto" allowtransparency="true" id="evernote_clearly__controller" frameborder="0"></iframe><style type="text/css" id="evernote_clearly__css_for_reformat">html.evernote_clearly__before_visible, html > body.evernote_clearly__before_visible, body.evernote_clearly__before_visible { position: static !important; margin: 0 !important; padding: 0 !important; border: 0 !important; }
|
|
118
|
+
html.evernote_clearly__is_visible, html > body.evernote_clearly__is_visible, body.evernote_clearly__is_visible { overflow: hidden !important; overflow-x: hidden !important; overflow-y: hidden !important; }
|
|
119
|
+
html.evernote_clearly__before_visible object, html.evernote_clearly__before_visible embed, html.evernote_clearly__before_visible iframe, html > body.evernote_clearly__before_visible object, html > body.evernote_clearly__before_visible embed, html > body.evernote_clearly__before_visible iframe, body.evernote_clearly__before_visible object, body.evernote_clearly__before_visible embed, body.evernote_clearly__before_visible iframe { visibility: hidden !important; }
|
|
120
|
+
#evernote_clearly__reformat { margin: 0 !important; padding: 0 !important; border: none !important; position: absolute !important; width: 100% !important; height: 100% !important; min-height: 100% !important; top: -100%; left: -100%; z-index: 2147483647 !important; }
|
|
121
|
+
html.evernote_clearly__before_visible #evernote_clearly__reformat, html > body.evernote_clearly__before_visible #evernote_clearly__reformat, body.evernote_clearly__before_visible #evernote_clearly__reformat, #evernote_clearly__reformat { display: block !important; overflow: auto !important; visibility: visible !important; }
|
|
122
|
+
</style></div><iframe style="top: 0px; left: 0px;" scrolling="auto" allowtransparency="true" id="evernote_clearly__reformat" frameborder="0"></iframe><style type="text/css" id="clearly_next_pages_container__css">#clearly_next_pages_container { margin: 0; padding: 0; border: none; position: absolute; width: 10px; height: 10px; top: -100px; left: -100px; } #clearly_next_pages_container iframe { margin: 0; padding: 0; border: none; position: absolute; width: 10px; height: 10px; top: -100px; left: -100px; } </style><div id="clearly_next_pages_container"></div></body></html>
|
data/test/test_helper.rb
ADDED
data/test/xp_test.rb
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require './lib/xp'
|
|
2
|
+
|
|
3
|
+
HTML = File.open('./test/test_content.html').read
|
|
4
|
+
XPATH_QUERY = '//title'
|
|
5
|
+
XPATH_RESULT = "<title>How You Know</title>\n"
|
|
6
|
+
|
|
7
|
+
CSS_QUERY = 'title'
|
|
8
|
+
CSS_RESULT = "<title>How You Know</title>\n"
|
|
9
|
+
|
|
10
|
+
LYNX = 'http://www.delorie.com/web/lynxview.html'
|
|
11
|
+
LYNX_TAGLINE = 'Lynx Viewer'
|
|
12
|
+
SIMPLE_PAGE_URL = "http://w3m.sourceforge.net/"
|
|
13
|
+
TEST_URL = 'http://qucentis.com/unavailable_404_link'
|
|
14
|
+
TEST_FILE_URL = 'http://abc.com/file/hello.mov?key=123&id=569'
|
|
15
|
+
TEST_FILE_BASENAME = 'hello'
|
|
16
|
+
TEST_FILE_EXTENSION = '.mov'
|
|
17
|
+
|
|
18
|
+
class TestXP < Minitest::Test
|
|
19
|
+
String.send(:include, XP)
|
|
20
|
+
|
|
21
|
+
def setup
|
|
22
|
+
@html = File.open('./test/test_content.html').read
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_all_methods_introduced
|
|
26
|
+
%w|to_nokogiri page_source download|.each do |method|
|
|
27
|
+
assert_respond_to "http://google.com", method
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_to_nokogiri
|
|
32
|
+
assert_instance_of Nokogiri::HTML::Document, HTML.to_nokogiri
|
|
33
|
+
assert_equal LYNX_TAGLINE, LYNX.to_nokogiri.xpath('//title/text()').to_s
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_conversion_to_nodeset
|
|
37
|
+
assert_equal HTML.to_nokogiri.css(CSS_QUERY).to_html, CSS_RESULT
|
|
38
|
+
assert_equal HTML.to_nokogiri.css(XPATH_QUERY).to_html, XPATH_RESULT
|
|
39
|
+
assert_equal HTML.to_nokogiri.css(XPATH_QUERY).xpath("//title/text()").to_html, "How You Know"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def test_page_source
|
|
43
|
+
assert_equal SIMPLE_PAGE_URL.page_source.to_nokogiri.xpath("//address/a/text()").to_html, "aito@fw.ipsj.or.jp"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_css
|
|
47
|
+
assert_respond_to "", :css
|
|
48
|
+
assert_equal HTML.to_nokogiri.css(CSS_QUERY).to_html, HTML.css(CSS_QUERY).to_html
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_xpath
|
|
52
|
+
assert_respond_to "", :xpath
|
|
53
|
+
assert_equal HTML.to_nokogiri.xpath(XPATH_QUERY).to_html, HTML.xpath(XPATH_QUERY).to_html
|
|
54
|
+
end
|
|
55
|
+
end
|
data/xp.gemspec
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |s|
|
|
5
|
+
s.name = "xp"
|
|
6
|
+
s.version = "2.0.0"
|
|
7
|
+
s.authors = ["Jikku Jose"]
|
|
8
|
+
s.email = ['jikkujose@gmail.com']
|
|
9
|
+
s.summary = "Gem that enables String class to help quick & dirty scraping tasks."
|
|
10
|
+
s.description = "Provides a monkey patched String class that can download & filter web pages using CSS/XPATH selectors; also has a very intuitive method to download files directly from their urls."
|
|
11
|
+
s.homepage = "http://github.com/JikkuJose/xp"
|
|
12
|
+
s.license = "MIT"
|
|
13
|
+
s.files = `git ls-files -z`.split("\x0")
|
|
14
|
+
s.executables = ["xp"]
|
|
15
|
+
|
|
16
|
+
s.add_dependency "nokogiri", '~> 1.6'
|
|
17
|
+
end
|
metadata
CHANGED
|
@@ -1,37 +1,61 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xp
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jikku Jose
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-
|
|
11
|
+
date: 2014-12-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - ~>
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '1.6'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - ~>
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
27
|
-
description:
|
|
28
|
-
|
|
26
|
+
version: '1.6'
|
|
27
|
+
description: Provides a monkey patched String class that can download & filter web
|
|
28
|
+
pages using CSS/XPATH selectors; also has a very intuitive method to download files
|
|
29
|
+
directly from their urls.
|
|
30
|
+
email:
|
|
31
|
+
- jikkujose@gmail.com
|
|
29
32
|
executables:
|
|
30
33
|
- xp
|
|
31
34
|
extensions: []
|
|
32
35
|
extra_rdoc_files: []
|
|
33
36
|
files:
|
|
37
|
+
- .gitignore
|
|
38
|
+
- Gemfile
|
|
39
|
+
- Gemfile.lock
|
|
40
|
+
- LICENSE.txt
|
|
41
|
+
- README.markdown
|
|
42
|
+
- Rakefile
|
|
34
43
|
- bin/xp
|
|
44
|
+
- lib/user_agents.rb
|
|
45
|
+
- lib/xp.rb
|
|
46
|
+
- pkg/xp-0.0.1.gem
|
|
47
|
+
- pkg/xp-0.0.2.gem
|
|
48
|
+
- pkg/xp-1.0.0.gem
|
|
49
|
+
- research/attribute_extractor.rb
|
|
50
|
+
- research/bb_parser.rb
|
|
51
|
+
- research/interface.rb
|
|
52
|
+
- research/load.rb
|
|
53
|
+
- research/selector.rb
|
|
54
|
+
- research/y.htm
|
|
55
|
+
- test/test_content.html
|
|
56
|
+
- test/test_helper.rb
|
|
57
|
+
- test/xp_test.rb
|
|
58
|
+
- xp.gemspec
|
|
35
59
|
homepage: http://github.com/JikkuJose/xp
|
|
36
60
|
licenses:
|
|
37
61
|
- MIT
|
|
@@ -55,5 +79,5 @@ rubyforge_project:
|
|
|
55
79
|
rubygems_version: 2.2.2
|
|
56
80
|
signing_key:
|
|
57
81
|
specification_version: 4
|
|
58
|
-
summary:
|
|
82
|
+
summary: Gem that enables String class to help quick & dirty scraping tasks.
|
|
59
83
|
test_files: []
|