sw_ruby_tika_app 1.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +4 -0
- data/HISTORY +10 -0
- data/LICENSE +20 -0
- data/README.md +54 -0
- data/Rakefile +1 -0
- data/ext/tika-app-1.14.jar +0 -0
- data/lib/ruby_tika_app.rb +82 -0
- data/ruby_tika_app.gemspec +32 -0
- data/spec/docs/cnn.com +1473 -0
- data/spec/docs/graph sampling simplex - 11.pdf +0 -0
- data/spec/docs/news.ycombinator.com +24 -0
- data/spec/ruby_tika_app_spec.rb +114 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/support/test_server.rb +21 -0
- metadata +206 -0
Binary file
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<html><head><link rel="stylesheet" type="text/css" href="news.css">
|
2
|
+
<link rel="shortcut icon" href="favicon.ico">
|
3
|
+
<script type="text/javascript">
|
4
|
+
function byId(id) {
|
5
|
+
return document.getElementById(id);
|
6
|
+
}
|
7
|
+
|
8
|
+
function vote(node) {
|
9
|
+
var v = node.id.split(/_/); // {'up', '123'}
|
10
|
+
var item = v[1];
|
11
|
+
|
12
|
+
// hide arrows
|
13
|
+
byId('up_' + item).style.visibility = 'hidden';
|
14
|
+
byId('down_' + item).style.visibility = 'hidden';
|
15
|
+
|
16
|
+
// ping server
|
17
|
+
var ping = new Image();
|
18
|
+
ping.src = node.href;
|
19
|
+
|
20
|
+
return false; // cancel browser nav
|
21
|
+
} </script><title>Hacker News</title></head><body><center><table border=0 cellpadding=0 cellspacing=0 width="85%" bgcolor=#f6f6ef><tr><td bgcolor=#ff6600><table border=0 cellpadding=0 cellspacing=0 width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="http://ycombinator.com"><img src="y18.gif" width=18 height=18 style="border:1px #ffffff solid;"></img></a></td><td style="line-height:12pt; height:10px;"><span class="pagetop"><b><a href="news">Hacker News</a></b><img src="s.gif" height=1 width=10><a href="newest">new</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a></span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop"><a href="newslogin?whence=%6e%65%77%73">login</a></span></td></tr></table></td></tr><tr style="height:10px"></tr><tr><td><table border=0 cellpadding=0 cellspacing=0><tr><td align=right valign=top class="title">1.</td><td><center><a id=up_5674230 href="vote?for=5674230&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674230></span></center></td><td class="title"><a href="http://www.motherjones.com/tom-philpott/2013/05/7-dodgy-foodag-practices-banned-europe-just-fine-here">Food Practices Banned in Europe But Allowed in the US</a><span class="comhead"> (motherjones.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674230>117 points</span> by <a href="user?id=casca">casca</a> 2 hours ago | <a href="item?id=5674230">70 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">2.</td><td><center><a id=up_5674193 href="vote?for=5674193&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674193></span></center></td><td class="title"><a href="http://news.cnet.com/8301-13578_3-57583395-38/doj-we-dont-need-warrants-for-e-mail-facebook-chats/">DOJ: We don't need warrants for e-mail, Facebook chats</a><span class="comhead"> (cnet.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674193>46 points</span> by <a href="user?id=declan">declan</a> 2 hours ago | <a href="item?id=5674193">22 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">3.</td><td><center><a id=up_5673628 href="vote?for=5673628&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673628></span></center></td><td class="title"><a href="http://peternixey.com/post/49928526270/dear-apple-lets-talk-about-photos">Dear Apple, let's talk about photos</a><span class="comhead"> (peternixey.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673628>206 points</span> by <a href="user?id=robheaton">robheaton</a> 4 hours ago | <a href="item?id=5673628">194 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">4.</td><td><center><a id=up_5674380 href="vote?for=5674380&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674380></span></center></td><td class="title"><a href="http://37signals.com/reportcard">37signals report card</a><span class="comhead"> (37signals.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674380>38 points</span> by <a href="user?id=wlll">wlll</a> 1 hour ago | <a href="item?id=5674380">10 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">5.</td><td><center><a id=up_5673339 href="vote?for=5673339&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673339></span></center></td><td class="title"><a href="https://planscope.io/blog/the-freelancers-guide-to-recurring-revenue/">The Freelancer's Guide to Recurring Revenue</a><span class="comhead"> (planscope.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673339>173 points</span> by <a href="user?id=sherm8n">sherm8n</a> 6 hours ago | <a href="item?id=5673339">54 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">6.</td><td><center><a id=up_5674510 href="vote?for=5674510&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674510></span></center></td><td class="title"><a href="https://github.com/mDialog/scala-zeromq">scala-zeromq - thread-safe ZeroMQ sockets for Scala</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674510>24 points</span> by <a href="user?id=ninjakeyboard">ninjakeyboard</a> 1 hour ago | <a href="item?id=5674510">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">7.</td><td><center><a id=up_5674438 href="vote?for=5674438&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674438></span></center></td><td class="title"><a href="http://www.madinamerica.com/2012/02/why-anti-authoritarians-are-diagnosed-as-mentally-ill/">Why Anti-Authoritarians are Diagnosed as Mentally Ill (2012)</a><span class="comhead"> (madinamerica.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674438>24 points</span> by <a href="user?id=yesbabyyes">yesbabyyes</a> 1 hour ago | <a href="item?id=5674438">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">8.</td><td><center><a id=up_5673948 href="vote?for=5673948&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673948></span></center></td><td class="title"><a href="http://www.maths.manchester.ac.uk/~jm/Choreographies/">Planar Choreographies: odd orbital mechanics</a><span class="comhead"> (manchester.ac.uk) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673948>51 points</span> by <a href="user?id=ColinWright">ColinWright</a> 3 hours ago | <a href="item?id=5673948">13 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">9.</td><td><center><a id=up_5674838 href="vote?for=5674838&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674838></span></center></td><td class="title"><a href="http://baohaojun.github.io/blog/2013/05/08/stripping-kernel-code-for-reading.html">Stripping kernel/uboot source to 10% for code reading</a><span class="comhead"> (baohaojun.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674838>10 points</span> by <a href="user?id=baohaojun">baohaojun</a> 51 minutes ago | <a href="item?id=5674838">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">10.</td><td><center><a id=up_5673356 href="vote?for=5673356&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673356></span></center></td><td class="title"><a href="http://blog.bitops.com/blog/2013/05/01/unreal-javascript/">Unreal JavaScript</a><span class="comhead"> (bitops.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673356>100 points</span> by <a href="user?id=ndr">ndr</a> 6 hours ago | <a href="item?id=5673356">49 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">11.</td><td><center><a id=up_5673544 href="vote?for=5673544&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673544></span></center></td><td class="title"><a href="http://sidekicksrc.com/post/if-you-love-automation-why-is-your-development-environment-manual/">If you love automation, why is your development environment manual?</a><span class="comhead"> (sidekicksrc.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673544>76 points</span> by <a href="user?id=timruffles">timruffles</a> 5 hours ago | <a href="item?id=5673544">63 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">12.</td><td><center><a id=up_5674279 href="vote?for=5674279&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674279></span></center></td><td class="title"><a href="http://www.mobilesort.com/blog/ios-sale-numbers-by-app-store-rank.html">iOS Sale Numbers By App Store Rank</a><span class="comhead"> (mobilesort.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674279>17 points</span> by <a href="user?id=chrisa">chrisa</a> 2 hours ago | <a href="item?id=5674279">7 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">13.</td><td><center><a id=up_5674150 href="vote?for=5674150&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674150></span></center></td><td class="title"><a href="http://minimaxir.com/2013/05/stones-of-jordan/">Diablo III Economy Broken by an Integer Overflow Bug</a><span class="comhead"> (minimaxir.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674150>58 points</span> by <a href="user?id=minimaxir">minimaxir</a> 2 hours ago | <a href="item?id=5674150">61 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">14.</td><td><center><a id=up_5670719 href="vote?for=5670719&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5670719></span></center></td><td class="title"><a href="https://www.eff.org/deeplinks/2013/05/why-isnt-gatsby-public-domain">Why Isn't Gatsby in the Public Domain?</a><span class="comhead"> (eff.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5670719>526 points</span> by <a href="user?id=ninthfrank07">ninthfrank07</a> 19 hours ago | <a href="item?id=5670719">159 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">15.</td><td><center><a id=up_5672515 href="vote?for=5672515&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672515></span></center></td><td class="title"><a href="http://www.flattestroute.com/">Show HN: Flattest Route - A web app to help you avoid hills in SF</a><span class="comhead"> (flattestroute.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672515>200 points</span> by <a href="user?id=jonny_eh">jonny_eh</a> 11 hours ago | <a href="item?id=5672515">67 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">16.</td><td><center><a id=up_5674263 href="vote?for=5674263&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674263></span></center></td><td class="title"><a href="http://www.pewsocialtrends.org/2013/05/07/gun-homicide-rate-down-49-since-1993-peak-public-unaware/">Gun Homicide Rate Down 49% Since 1993 Peak; Public Unaware</a><span class="comhead"> (pewsocialtrends.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674263>141 points</span> by <a href="user?id=krg">krg</a> 2 hours ago | <a href="item?id=5674263">173 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">17.</td><td><center><a id=up_5674962 href="vote?for=5674962&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674962></span></center></td><td class="title"><a href="http://weathersignal.com/" rel="nofollow">Pocket Meteorology: Using Android Phones to Crowdsource the Weather</a><span class="comhead"> (weathersignal.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674962>5 points</span> by <a href="user?id=exap">exap</a> 30 minutes ago | <a href="item?id=5674962">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">18.</td><td><center><a id=up_5675038 href="vote?for=5675038&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5675038></span></center></td><td class="title"><a href="http://www.allanberger.com/post/49938151321/how-to-make-your-apps-retina-ready" rel="nofollow">How to make your Apps Retina ready</a><span class="comhead"> (allanberger.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5675038>4 points</span> by <a href="user?id=andreasklinger">andreasklinger</a> 16 minutes ago | <a href="item?id=5675038">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">19.</td><td><center><a id=up_5673480 href="vote?for=5673480&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673480></span></center></td><td class="title"><a href="http://thecodeartist.blogspot.com/2013/05/sensors-on-google-glass.html">Sensors on Google Glass</a><span class="comhead"> (thecodeartist.blogspot.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673480>44 points</span> by <a href="user?id=cvs268">cvs268</a> 5 hours ago | <a href="item?id=5673480">17 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">20.</td><td><center><a id=up_5673032 href="vote?for=5673032&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673032></span></center></td><td class="title"><a href="https://github.com/d11wtq/boris">Boris: A tiny but robust REPL for PHP</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673032>90 points</span> by <a href="user?id=nodesocket">nodesocket</a> 8 hours ago | <a href="item?id=5673032">36 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">21.</td><td><center><a id=up_5674755 href="vote?for=5674755&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674755></span></center></td><td class="title"><a href="http://www.washingtonpost.com/local/education/free-online-textbooks-with-conditions/2013/05/07/b49364ce-b761-11e2-92f3-f291801936b8_story.html?hpid=z9">Coursera to offer students free online textbooks, with conditions</a><span class="comhead"> (washingtonpost.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674755>6 points</span> by <a href="user?id=sonabinu">sonabinu</a> 1 hour ago | <a href="item?id=5674755">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">22.</td><td></td><td class="title"><a href="item?id=5674229">Join us and disrupt one of the world's worst monopolies</a></td></tr><tr><td colspan=2></td><td class="subtext">2 hours ago</td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">23.</td><td><center><a id=up_5673463 href="vote?for=5673463&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673463></span></center></td><td class="title"><a href="http://phoboslab.org/log/2013/05/mpeg1-video-decoder-in-javascript">MPEG1 Video Decoder in JavaScript</a><span class="comhead"> (phoboslab.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673463>39 points</span> by <a href="user?id=phoboslab">phoboslab</a> 5 hours ago | <a href="item?id=5673463">11 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">24.</td><td><center><a id=up_5671652 href="vote?for=5671652&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5671652></span></center></td><td class="title"><a href="https://twitter.com/id_aa_carmack/status/331918309916295168">John Carmack starting port of Wolf 3D in Haskell</a><span class="comhead"> (twitter.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5671652>274 points</span> by <a href="user?id=bobfunk">bobfunk</a> 16 hours ago | <a href="item?id=5671652">108 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">25.</td><td><center><a id=up_5674417 href="vote?for=5674417&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674417></span></center></td><td class="title"><a href="http://www.d2.vu/">Demonoid returns, thanks to community hosting.</a><span class="comhead"> (d2.vu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674417>7 points</span> by <a href="user?id=bichiliad">bichiliad</a> 1 hour ago | <a href="item?id=5674417">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">26.</td><td><center><a id=up_5672875 href="vote?for=5672875&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672875></span></center></td><td class="title"><a href="http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html">A regular expression matcher By Rob Pike and Brian Kernighan (2007)</a><span class="comhead"> (princeton.edu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672875>78 points</span> by <a href="user?id=sid6376">sid6376</a> 9 hours ago | <a href="item?id=5672875">34 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">27.</td><td><center><a id=up_5672354 href="vote?for=5672354&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672354></span></center></td><td class="title"><a href="http://nemaload.davidad.org/">Show HN: My friend's project to simulate an entire C. Elegans</a><span class="comhead"> (davidad.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672354>127 points</span> by <a href="user?id=SlyShy">SlyShy</a> 12 hours ago | <a href="item?id=5672354">41 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">28.</td><td><center><a id=up_5674550 href="vote?for=5674550&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674550></span></center></td><td class="title"><a href="http://blog.mortardata.com/post/49934459499/recommender-systems-for-free">How to get Hilary Mason to build your recommender for free</a><span class="comhead"> (mortardata.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674550>34 points</span> by <a href="user?id=kky">kky</a> 1 hour ago | <a href="item?id=5674550">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">29.</td><td><center><a id=up_5674434 href="vote?for=5674434&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674434></span></center></td><td class="title"><a href="http://mailman.ravenbrook.com/pipermail/mps-discussion/2013-May/000128.html">First release of MPS GC with full documentation (1.111.0)</a><span class="comhead"> (ravenbrook.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674434>6 points</span> by <a href="user?id=BruceM">BruceM</a> 1 hour ago | <a href="item?id=5674434">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">30.</td><td><center><a id=up_5668374 href="vote?for=5668374&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5668374></span></center></td><td class="title"><a href="http://jonobr1.github.io/two.js">Two.js</a><span class="comhead"> (jonobr1.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5668374>466 points</span> by <a href="user?id=jgv">jgv</a> 1 day ago | <a href="item?id=5668374">61 comments</a></td></tr><tr style="height:5px"></tr><tr style="height:10px"></tr><tr><td colspan=2></td><td class="title"><a href="news2">More</a></td></tr></table></td></tr><tr><td><img src="s.gif" height=10 width=0><table width="100%" cellspacing=0 cellpadding=1><tr><td bgcolor=#ff6600></td></tr></table><br>
|
22
|
+
<center><span class="yclinks"><a href="lists">Lists</a> | <a href="rss">RSS</a> | <a href="http://ycombinator.com/bookmarklet.html">Bookmarklet</a> | <a href="http://ycombinator.com/newsguidelines.html">Guidelines</a> | <a href="http://ycombinator.com/newsfaq.html">FAQ</a> | <a href="dmca.html">DMCA</a> | <a href="http://ycombinator.com/newsnews.html">News News</a> | <a href="item?id=363">Feature Requests</a> | <a href="http://ycombinator.com">Y Combinator</a> | <a href="http://ycombinator.com/apply.html">Apply</a> | <a href="http://ycombinator.com/lib.html">Library</a></span><br><br>
|
23
|
+
<form method=get action="//www.hnsearch.com/search#request/all">Search: <input type=text name="q" value="" size=17></form><br>
|
24
|
+
</center></td></tr></table></center></body></html>
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
describe RubyTikaApp do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
doc_path = "#{File.join(File.dirname(__FILE__))}/docs"
|
8
|
+
|
9
|
+
@test_file = "#{doc_path}/graph sampling simplex - 11.pdf"
|
10
|
+
|
11
|
+
@cnn_com_file = "#{doc_path}/cnn.com"
|
12
|
+
@news_ycombinator_com_file = "#{doc_path}/news.ycombinator.com"
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'Error' do
|
16
|
+
it 'has an error' do
|
17
|
+
expect {
|
18
|
+
rta = RubyTikaApp.new('No file')
|
19
|
+
rta.to_xml
|
20
|
+
}.to raise_error
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe '#to_xml' do
|
25
|
+
it 'header' do
|
26
|
+
rta = RubyTikaApp.new(@test_file)
|
27
|
+
rta.to_xml[0..37].should == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'middle' do
|
31
|
+
rta = RubyTikaApp.new(@test_file)
|
32
|
+
xml = rta.to_xml
|
33
|
+
|
34
|
+
xml_size = xml.size / 2
|
35
|
+
|
36
|
+
xml[xml_size..(xml_size + 100)].should == "Frontier Sampling (FS).\nSince this is the only difference between MHRW and USDSG,\nto be simple, we wi"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#to_html' do
|
41
|
+
it 'header' do
|
42
|
+
rta = RubyTikaApp.new(@test_file)
|
43
|
+
rta.to_html[0..42].should == "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'middle' do
|
47
|
+
rta = RubyTikaApp.new(@test_file)
|
48
|
+
rta.to_html[10000 ... 10100].should == "g a user’s profile is\nmuch more time-consuming compared with the calculation to\nchoose the nex"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe '#to_json' do
|
53
|
+
it 'header' do
|
54
|
+
rta = RubyTikaApp.new(@test_file)
|
55
|
+
rta.to_json[0..42].should == "{\"Application\":\"\\u0027Certified by IEEE PDF"
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'middle' do
|
59
|
+
rta = RubyTikaApp.new(@test_file)
|
60
|
+
rta.to_json[100 ... 150].should == "\"171510\",\"Content-Type\":\"application/pdf\",\"Creatio"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe '#to_text' do
|
65
|
+
it 'header' do
|
66
|
+
rta = RubyTikaApp.new(@test_file)
|
67
|
+
rta.to_text[0..42].should == "Understanding Graph Sampling Algorithms\nfor"
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'middle' do
|
71
|
+
rta = RubyTikaApp.new(@test_file)
|
72
|
+
rta.to_text[100 ... 150].should == "n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe '#to_text_main' do
|
77
|
+
it 'header' do
|
78
|
+
rta = RubyTikaApp.new(@test_file)
|
79
|
+
rta.to_text_main[0..42].should == 'Understanding Graph Sampling Algorithms for'
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'middle' do
|
83
|
+
rta = RubyTikaApp.new(@test_file)
|
84
|
+
rta.to_text_main[100 ... 150].should == "n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe '#to_metadata' do
|
89
|
+
it 'header' do
|
90
|
+
rta = RubyTikaApp.new(@test_file)
|
91
|
+
rta.to_metadata[0..42].should == "Application: 'Certified by IEEE PDFeXpress "
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'middle' do
|
95
|
+
rta = RubyTikaApp.new(@test_file)
|
96
|
+
rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe 'external URLs' do
|
101
|
+
it 'should be able to parse an http url' do
|
102
|
+
rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
|
103
|
+
rta.to_text.should_not be_nil
|
104
|
+
rta.to_text.should eq(RubyTikaApp.new(@cnn_com_file).to_text)
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'should be able to parse another http url' do
|
108
|
+
rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
|
109
|
+
rta.to_text.should_not be_nil
|
110
|
+
rta.to_text.should eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'bundler/setup'
|
6
|
+
|
7
|
+
require 'ruby_tika_app'
|
8
|
+
require 'rspec'
|
9
|
+
|
10
|
+
# Include all files under spec/support
|
11
|
+
Dir['./spec/support/**/*.rb'].each {|f| require f}
|
12
|
+
|
13
|
+
# Start a local rack server to serve up test pages.
|
14
|
+
@server_thread = Thread.new do
|
15
|
+
Rack::Handler::Thin.run MyApp::Test::Server.new, :Port => 9299
|
16
|
+
end
|
17
|
+
|
18
|
+
sleep(1) # wait a sec for the server to be booted
|
19
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
|
4
|
+
module MyApp
|
5
|
+
module Test
|
6
|
+
class Server
|
7
|
+
def call(env)
|
8
|
+
@root = "#{File.expand_path(File.dirname(__FILE__))}/../docs/"
|
9
|
+
path = Rack::Utils.unescape(env['PATH_INFO'])
|
10
|
+
path += 'index.html' if path == '/'
|
11
|
+
file = @root + "#{path}"
|
12
|
+
|
13
|
+
if File.exists?(file)
|
14
|
+
[ 200, {'Content-Type' => 'text/html'}, File.read(file) ]
|
15
|
+
else
|
16
|
+
[ 404, {'Content-Type' => 'text/plain'}, 'file not found' ]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sw_ruby_tika_app
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.14.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Parker
|
8
|
+
- Ilya Bazylchuk
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2017-03-27 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: open4
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rspec
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 2.13.0
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 2.13.0
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: bundler
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.0.15
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.0.15
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: simplecov
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: json
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: rack
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: thin
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
type: :development
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: pry
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
type: :development
|
134
|
+
prerelease: false
|
135
|
+
version_requirements: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
- !ruby/object:Gem::Dependency
|
141
|
+
name: rb-readline
|
142
|
+
requirement: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: '0'
|
147
|
+
type: :development
|
148
|
+
prerelease: false
|
149
|
+
version_requirements: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
description: Wrapper around the tika-app jar
|
155
|
+
email:
|
156
|
+
- ilya.bazylchuk@startdatelabs.com
|
157
|
+
executables: []
|
158
|
+
extensions: []
|
159
|
+
extra_rdoc_files: []
|
160
|
+
files:
|
161
|
+
- ".gitignore"
|
162
|
+
- ".rspec"
|
163
|
+
- Gemfile
|
164
|
+
- HISTORY
|
165
|
+
- LICENSE
|
166
|
+
- README.md
|
167
|
+
- Rakefile
|
168
|
+
- ext/tika-app-1.14.jar
|
169
|
+
- lib/ruby_tika_app.rb
|
170
|
+
- ruby_tika_app.gemspec
|
171
|
+
- spec/docs/cnn.com
|
172
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
173
|
+
- spec/docs/news.ycombinator.com
|
174
|
+
- spec/ruby_tika_app_spec.rb
|
175
|
+
- spec/spec_helper.rb
|
176
|
+
- spec/support/test_server.rb
|
177
|
+
homepage: https://github.com/startdatelabs/ruby_tika_app
|
178
|
+
licenses: []
|
179
|
+
metadata: {}
|
180
|
+
post_install_message:
|
181
|
+
rdoc_options: []
|
182
|
+
require_paths:
|
183
|
+
- lib
|
184
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
185
|
+
requirements:
|
186
|
+
- - ">="
|
187
|
+
- !ruby/object:Gem::Version
|
188
|
+
version: '0'
|
189
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
194
|
+
requirements: []
|
195
|
+
rubyforge_project:
|
196
|
+
rubygems_version: 2.5.1
|
197
|
+
signing_key:
|
198
|
+
specification_version: 4
|
199
|
+
summary: Wrapper around the tika-app jar
|
200
|
+
test_files:
|
201
|
+
- spec/docs/cnn.com
|
202
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
203
|
+
- spec/docs/news.ycombinator.com
|
204
|
+
- spec/ruby_tika_app_spec.rb
|
205
|
+
- spec/spec_helper.rb
|
206
|
+
- spec/support/test_server.rb
|