ruby_tika_app 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +5 -2
- data/{README.textile → README.md} +22 -15
- data/lib/ruby_tika_app.rb +5 -1
- data/ruby_tika_app.gemspec +5 -2
- data/spec/docs/cnn.com +1473 -0
- data/spec/docs/news.ycombinator.com +24 -0
- data/spec/ruby_tika_app_spec.rb +20 -1
- data/spec/spec_helper.rb +10 -0
- data/spec/support/test_server.rb +23 -0
- metadata +57 -3
@@ -0,0 +1,24 @@
|
|
1
|
+
<html><head><link rel="stylesheet" type="text/css" href="news.css">
|
2
|
+
<link rel="shortcut icon" href="favicon.ico">
|
3
|
+
<script type="text/javascript">
|
4
|
+
function byId(id) {
|
5
|
+
return document.getElementById(id);
|
6
|
+
}
|
7
|
+
|
8
|
+
function vote(node) {
|
9
|
+
var v = node.id.split(/_/); // {'up', '123'}
|
10
|
+
var item = v[1];
|
11
|
+
|
12
|
+
// hide arrows
|
13
|
+
byId('up_' + item).style.visibility = 'hidden';
|
14
|
+
byId('down_' + item).style.visibility = 'hidden';
|
15
|
+
|
16
|
+
// ping server
|
17
|
+
var ping = new Image();
|
18
|
+
ping.src = node.href;
|
19
|
+
|
20
|
+
return false; // cancel browser nav
|
21
|
+
} </script><title>Hacker News</title></head><body><center><table border=0 cellpadding=0 cellspacing=0 width="85%" bgcolor=#f6f6ef><tr><td bgcolor=#ff6600><table border=0 cellpadding=0 cellspacing=0 width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="http://ycombinator.com"><img src="y18.gif" width=18 height=18 style="border:1px #ffffff solid;"></img></a></td><td style="line-height:12pt; height:10px;"><span class="pagetop"><b><a href="news">Hacker News</a></b><img src="s.gif" height=1 width=10><a href="newest">new</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a></span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop"><a href="newslogin?whence=%6e%65%77%73">login</a></span></td></tr></table></td></tr><tr style="height:10px"></tr><tr><td><table border=0 cellpadding=0 cellspacing=0><tr><td align=right valign=top class="title">1.</td><td><center><a id=up_5674230 href="vote?for=5674230&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674230></span></center></td><td class="title"><a href="http://www.motherjones.com/tom-philpott/2013/05/7-dodgy-foodag-practices-banned-europe-just-fine-here">Food Practices Banned in Europe But Allowed in the US</a><span class="comhead"> (motherjones.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674230>117 points</span> by <a href="user?id=casca">casca</a> 2 hours ago | <a href="item?id=5674230">70 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">2.</td><td><center><a id=up_5674193 href="vote?for=5674193&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674193></span></center></td><td class="title"><a href="http://news.cnet.com/8301-13578_3-57583395-38/doj-we-dont-need-warrants-for-e-mail-facebook-chats/">DOJ: We don't need warrants for e-mail, Facebook chats</a><span class="comhead"> (cnet.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674193>46 points</span> by <a href="user?id=declan">declan</a> 2 hours ago | <a href="item?id=5674193">22 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">3.</td><td><center><a id=up_5673628 href="vote?for=5673628&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673628></span></center></td><td class="title"><a href="http://peternixey.com/post/49928526270/dear-apple-lets-talk-about-photos">Dear Apple, let's talk about photos</a><span class="comhead"> (peternixey.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673628>206 points</span> by <a href="user?id=robheaton">robheaton</a> 4 hours ago | <a href="item?id=5673628">194 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">4.</td><td><center><a id=up_5674380 href="vote?for=5674380&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674380></span></center></td><td class="title"><a href="http://37signals.com/reportcard">37signals report card</a><span class="comhead"> (37signals.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674380>38 points</span> by <a href="user?id=wlll">wlll</a> 1 hour ago | <a href="item?id=5674380">10 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">5.</td><td><center><a id=up_5673339 href="vote?for=5673339&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673339></span></center></td><td class="title"><a href="https://planscope.io/blog/the-freelancers-guide-to-recurring-revenue/">The Freelancer's Guide to Recurring Revenue</a><span class="comhead"> (planscope.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673339>173 points</span> by <a href="user?id=sherm8n">sherm8n</a> 6 hours ago | <a href="item?id=5673339">54 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">6.</td><td><center><a id=up_5674510 href="vote?for=5674510&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674510></span></center></td><td class="title"><a href="https://github.com/mDialog/scala-zeromq">scala-zeromq - thread-safe ZeroMQ sockets for Scala</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674510>24 points</span> by <a href="user?id=ninjakeyboard">ninjakeyboard</a> 1 hour ago | <a href="item?id=5674510">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">7.</td><td><center><a id=up_5674438 href="vote?for=5674438&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674438></span></center></td><td class="title"><a href="http://www.madinamerica.com/2012/02/why-anti-authoritarians-are-diagnosed-as-mentally-ill/">Why Anti-Authoritarians are Diagnosed as Mentally Ill (2012)</a><span class="comhead"> (madinamerica.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674438>24 points</span> by <a href="user?id=yesbabyyes">yesbabyyes</a> 1 hour ago | <a href="item?id=5674438">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">8.</td><td><center><a id=up_5673948 href="vote?for=5673948&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673948></span></center></td><td class="title"><a href="http://www.maths.manchester.ac.uk/~jm/Choreographies/">Planar Choreographies: odd orbital mechanics</a><span class="comhead"> (manchester.ac.uk) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673948>51 points</span> by <a href="user?id=ColinWright">ColinWright</a> 3 hours ago | <a href="item?id=5673948">13 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">9.</td><td><center><a id=up_5674838 href="vote?for=5674838&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674838></span></center></td><td class="title"><a href="http://baohaojun.github.io/blog/2013/05/08/stripping-kernel-code-for-reading.html">Stripping kernel/uboot source to 10% for code reading</a><span class="comhead"> (baohaojun.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674838>10 points</span> by <a href="user?id=baohaojun">baohaojun</a> 51 minutes ago | <a href="item?id=5674838">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">10.</td><td><center><a id=up_5673356 href="vote?for=5673356&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673356></span></center></td><td class="title"><a href="http://blog.bitops.com/blog/2013/05/01/unreal-javascript/">Unreal JavaScript</a><span class="comhead"> (bitops.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673356>100 points</span> by <a href="user?id=ndr">ndr</a> 6 hours ago | <a href="item?id=5673356">49 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">11.</td><td><center><a id=up_5673544 href="vote?for=5673544&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673544></span></center></td><td class="title"><a href="http://sidekicksrc.com/post/if-you-love-automation-why-is-your-development-environment-manual/">If you love automation, why is your development environment manual?</a><span class="comhead"> (sidekicksrc.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673544>76 points</span> by <a href="user?id=timruffles">timruffles</a> 5 hours ago | <a href="item?id=5673544">63 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">12.</td><td><center><a id=up_5674279 href="vote?for=5674279&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674279></span></center></td><td class="title"><a href="http://www.mobilesort.com/blog/ios-sale-numbers-by-app-store-rank.html">iOS Sale Numbers By App Store Rank</a><span class="comhead"> (mobilesort.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674279>17 points</span> by <a href="user?id=chrisa">chrisa</a> 2 hours ago | <a href="item?id=5674279">7 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">13.</td><td><center><a id=up_5674150 href="vote?for=5674150&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674150></span></center></td><td class="title"><a href="http://minimaxir.com/2013/05/stones-of-jordan/">Diablo III Economy Broken by an Integer Overflow Bug</a><span class="comhead"> (minimaxir.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674150>58 points</span> by <a href="user?id=minimaxir">minimaxir</a> 2 hours ago | <a href="item?id=5674150">61 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">14.</td><td><center><a id=up_5670719 href="vote?for=5670719&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5670719></span></center></td><td class="title"><a href="https://www.eff.org/deeplinks/2013/05/why-isnt-gatsby-public-domain">Why Isn't Gatsby in the Public Domain?</a><span class="comhead"> (eff.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5670719>526 points</span> by <a href="user?id=ninthfrank07">ninthfrank07</a> 19 hours ago | <a href="item?id=5670719">159 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">15.</td><td><center><a id=up_5672515 href="vote?for=5672515&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672515></span></center></td><td class="title"><a href="http://www.flattestroute.com/">Show HN: Flattest Route - A web app to help you avoid hills in SF</a><span class="comhead"> (flattestroute.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672515>200 points</span> by <a href="user?id=jonny_eh">jonny_eh</a> 11 hours ago | <a href="item?id=5672515">67 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">16.</td><td><center><a id=up_5674263 href="vote?for=5674263&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674263></span></center></td><td class="title"><a href="http://www.pewsocialtrends.org/2013/05/07/gun-homicide-rate-down-49-since-1993-peak-public-unaware/">Gun Homicide Rate Down 49% Since 1993 Peak; Public Unaware</a><span class="comhead"> (pewsocialtrends.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674263>141 points</span> by <a href="user?id=krg">krg</a> 2 hours ago | <a href="item?id=5674263">173 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">17.</td><td><center><a id=up_5674962 href="vote?for=5674962&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674962></span></center></td><td class="title"><a href="http://weathersignal.com/" rel="nofollow">Pocket Meteorology: Using Android Phones to Crowdsource the Weather</a><span class="comhead"> (weathersignal.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674962>5 points</span> by <a href="user?id=exap">exap</a> 30 minutes ago | <a href="item?id=5674962">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">18.</td><td><center><a id=up_5675038 href="vote?for=5675038&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5675038></span></center></td><td class="title"><a href="http://www.allanberger.com/post/49938151321/how-to-make-your-apps-retina-ready" rel="nofollow">How to make your Apps Retina ready</a><span class="comhead"> (allanberger.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5675038>4 points</span> by <a href="user?id=andreasklinger">andreasklinger</a> 16 minutes ago | <a href="item?id=5675038">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">19.</td><td><center><a id=up_5673480 href="vote?for=5673480&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673480></span></center></td><td class="title"><a href="http://thecodeartist.blogspot.com/2013/05/sensors-on-google-glass.html">Sensors on Google Glass</a><span class="comhead"> (thecodeartist.blogspot.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673480>44 points</span> by <a href="user?id=cvs268">cvs268</a> 5 hours ago | <a href="item?id=5673480">17 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">20.</td><td><center><a id=up_5673032 href="vote?for=5673032&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673032></span></center></td><td class="title"><a href="https://github.com/d11wtq/boris">Boris: A tiny but robust REPL for PHP</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673032>90 points</span> by <a href="user?id=nodesocket">nodesocket</a> 8 hours ago | <a href="item?id=5673032">36 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">21.</td><td><center><a id=up_5674755 href="vote?for=5674755&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674755></span></center></td><td class="title"><a href="http://www.washingtonpost.com/local/education/free-online-textbooks-with-conditions/2013/05/07/b49364ce-b761-11e2-92f3-f291801936b8_story.html?hpid=z9">Coursera to offer students free online textbooks, with conditions</a><span class="comhead"> (washingtonpost.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674755>6 points</span> by <a href="user?id=sonabinu">sonabinu</a> 1 hour ago | <a href="item?id=5674755">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">22.</td><td></td><td class="title"><a href="item?id=5674229">Join us and disrupt one of the world's worst monopolies</a></td></tr><tr><td colspan=2></td><td class="subtext">2 hours ago</td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">23.</td><td><center><a id=up_5673463 href="vote?for=5673463&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673463></span></center></td><td class="title"><a href="http://phoboslab.org/log/2013/05/mpeg1-video-decoder-in-javascript">MPEG1 Video Decoder in JavaScript</a><span class="comhead"> (phoboslab.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673463>39 points</span> by <a href="user?id=phoboslab">phoboslab</a> 5 hours ago | <a href="item?id=5673463">11 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">24.</td><td><center><a id=up_5671652 href="vote?for=5671652&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5671652></span></center></td><td class="title"><a href="https://twitter.com/id_aa_carmack/status/331918309916295168">John Carmack starting port of Wolf 3D in Haskell</a><span class="comhead"> (twitter.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5671652>274 points</span> by <a href="user?id=bobfunk">bobfunk</a> 16 hours ago | <a href="item?id=5671652">108 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">25.</td><td><center><a id=up_5674417 href="vote?for=5674417&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674417></span></center></td><td class="title"><a href="http://www.d2.vu/">Demonoid returns, thanks to community hosting.</a><span class="comhead"> (d2.vu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674417>7 points</span> by <a href="user?id=bichiliad">bichiliad</a> 1 hour ago | <a href="item?id=5674417">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">26.</td><td><center><a id=up_5672875 href="vote?for=5672875&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672875></span></center></td><td class="title"><a href="http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html">A regular expression matcher By Rob Pike and Brian Kernighan (2007)</a><span class="comhead"> (princeton.edu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672875>78 points</span> by <a href="user?id=sid6376">sid6376</a> 9 hours ago | <a href="item?id=5672875">34 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">27.</td><td><center><a id=up_5672354 href="vote?for=5672354&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672354></span></center></td><td class="title"><a href="http://nemaload.davidad.org/">Show HN: My friend's project to simulate an entire C. Elegans</a><span class="comhead"> (davidad.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672354>127 points</span> by <a href="user?id=SlyShy">SlyShy</a> 12 hours ago | <a href="item?id=5672354">41 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">28.</td><td><center><a id=up_5674550 href="vote?for=5674550&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674550></span></center></td><td class="title"><a href="http://blog.mortardata.com/post/49934459499/recommender-systems-for-free">How to get Hilary Mason to build your recommender for free</a><span class="comhead"> (mortardata.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674550>34 points</span> by <a href="user?id=kky">kky</a> 1 hour ago | <a href="item?id=5674550">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">29.</td><td><center><a id=up_5674434 href="vote?for=5674434&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674434></span></center></td><td class="title"><a href="http://mailman.ravenbrook.com/pipermail/mps-discussion/2013-May/000128.html">First release of MPS GC with full documentation (1.111.0)</a><span class="comhead"> (ravenbrook.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674434>6 points</span> by <a href="user?id=BruceM">BruceM</a> 1 hour ago | <a href="item?id=5674434">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">30.</td><td><center><a id=up_5668374 href="vote?for=5668374&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5668374></span></center></td><td class="title"><a href="http://jonobr1.github.io/two.js">Two.js</a><span class="comhead"> (jonobr1.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5668374>466 points</span> by <a href="user?id=jgv">jgv</a> 1 day ago | <a href="item?id=5668374">61 comments</a></td></tr><tr style="height:5px"></tr><tr style="height:10px"></tr><tr><td colspan=2></td><td class="title"><a href="news2">More</a></td></tr></table></td></tr><tr><td><img src="s.gif" height=10 width=0><table width="100%" cellspacing=0 cellpadding=1><tr><td bgcolor=#ff6600></td></tr></table><br>
|
22
|
+
<center><span class="yclinks"><a href="lists">Lists</a> | <a href="rss">RSS</a> | <a href="http://ycombinator.com/bookmarklet.html">Bookmarklet</a> | <a href="http://ycombinator.com/newsguidelines.html">Guidelines</a> | <a href="http://ycombinator.com/newsfaq.html">FAQ</a> | <a href="dmca.html">DMCA</a> | <a href="http://ycombinator.com/newsnews.html">News News</a> | <a href="item?id=363">Feature Requests</a> | <a href="http://ycombinator.com">Y Combinator</a> | <a href="http://ycombinator.com/apply.html">Apply</a> | <a href="http://ycombinator.com/lib.html">Library</a></span><br><br>
|
23
|
+
<form method=get action="//www.hnsearch.com/search#request/all">Search: <input type=text name="q" value="" size=17></form><br>
|
24
|
+
</center></td></tr></table></center></body></html>
|
data/spec/ruby_tika_app_spec.rb
CHANGED
@@ -3,7 +3,12 @@ require 'spec_helper'
|
|
3
3
|
describe RubyTikaApp do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
|
6
|
+
doc_path = "#{File.join(File.dirname(__FILE__))}/docs"
|
7
|
+
|
8
|
+
@test_file = "#{doc_path}/graph sampling simplex - 11.pdf"
|
9
|
+
|
10
|
+
@cnn_com_file = "#{doc_path}/cnn.com"
|
11
|
+
@news_ycombinator_com_file = "#{doc_path}/news.ycombinator.com"
|
7
12
|
end
|
8
13
|
|
9
14
|
describe 'Error' do
|
@@ -91,4 +96,18 @@ describe RubyTikaApp do
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
describe 'external URLs' do
|
100
|
+
it 'should be able to parse an http url' do
|
101
|
+
rta = RubyTikaApp.new("http://localhost:9299/cnn.com")
|
102
|
+
rta.to_text.should_not be_nil
|
103
|
+
rta.to_text.should eq(RubyTikaApp.new(@cnn_com_file).to_text)
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'should be able to parse another http url' do
|
107
|
+
rta = RubyTikaApp.new("http://localhost:9299/news.ycombinator.com")
|
108
|
+
rta.to_text.should_not be_nil
|
109
|
+
rta.to_text.should eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
94
113
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,6 +7,16 @@ require 'bundler/setup'
|
|
7
7
|
require 'ruby_tika_app'
|
8
8
|
require 'rspec'
|
9
9
|
|
10
|
+
# Include all files under spec/support
|
11
|
+
Dir["./spec/support/**/*.rb"].each {|f| require f}
|
12
|
+
|
13
|
+
# Start a local rack server to serve up test pages.
|
14
|
+
@server_thread = Thread.new do
|
15
|
+
Rack::Handler::Thin.run MyApp::Test::Server.new, :Port => 9299
|
16
|
+
end
|
17
|
+
|
18
|
+
sleep(1) # wait a sec for the server to be booted
|
19
|
+
|
10
20
|
RSpec.configure do |config|
|
11
21
|
|
12
22
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
|
4
|
+
module MyApp
|
5
|
+
module Test
|
6
|
+
class Server
|
7
|
+
def call(env)
|
8
|
+
@root = "#{File.expand_path(File.dirname(__FILE__))}/../docs/"
|
9
|
+
path = Rack::Utils.unescape(env['PATH_INFO'])
|
10
|
+
path += 'index.html' if path == '/'
|
11
|
+
file = @root + "#{path}"
|
12
|
+
|
13
|
+
params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
|
14
|
+
|
15
|
+
if File.exists?(file)
|
16
|
+
[ 200, {"Content-Type" => "text/html"}, File.read(file) ]
|
17
|
+
else
|
18
|
+
[ 404, {'Content-Type' => 'text/plain'}, 'file not found' ]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_tika_app
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-05-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: open4
|
@@ -91,6 +91,54 @@ dependencies:
|
|
91
91
|
- - ! '>='
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: json
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: rack
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: thin
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
94
142
|
description: Wrapper around the tika-app jar
|
95
143
|
email:
|
96
144
|
- mrcsparker@gmail.com
|
@@ -103,14 +151,17 @@ files:
|
|
103
151
|
- Gemfile
|
104
152
|
- HISTORY
|
105
153
|
- LICENSE
|
106
|
-
- README.
|
154
|
+
- README.md
|
107
155
|
- Rakefile
|
108
156
|
- ext/tika-app-1.2.jar
|
109
157
|
- lib/ruby_tika_app.rb
|
110
158
|
- ruby_tika_app.gemspec
|
159
|
+
- spec/docs/cnn.com
|
111
160
|
- spec/docs/graph sampling simplex - 11.pdf
|
161
|
+
- spec/docs/news.ycombinator.com
|
112
162
|
- spec/ruby_tika_app_spec.rb
|
113
163
|
- spec/spec_helper.rb
|
164
|
+
- spec/support/test_server.rb
|
114
165
|
homepage: https://github.com/mrcsparker/ruby_tika_app
|
115
166
|
licenses: []
|
116
167
|
post_install_message:
|
@@ -136,6 +187,9 @@ signing_key:
|
|
136
187
|
specification_version: 3
|
137
188
|
summary: Wrapper around the tika-app jar
|
138
189
|
test_files:
|
190
|
+
- spec/docs/cnn.com
|
139
191
|
- spec/docs/graph sampling simplex - 11.pdf
|
192
|
+
- spec/docs/news.ycombinator.com
|
140
193
|
- spec/ruby_tika_app_spec.rb
|
141
194
|
- spec/spec_helper.rb
|
195
|
+
- spec/support/test_server.rb
|