ruby_tika_app_lambda 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +6 -0
- data/HISTORY +13 -0
- data/LICENSE +20 -0
- data/README.md +54 -0
- data/Rakefile +3 -0
- data/ext/tika-config.xml +13 -0
- data/lib/ruby_tika_app.rb +79 -0
- data/ruby_tika_app.gemspec +31 -0
- data/spec/docs/cnn.com +1473 -0
- data/spec/docs/graph sampling simplex - 11.pdf +0 -0
- data/spec/docs/news.ycombinator.com +24 -0
- data/spec/ruby_tika_app_spec.rb +122 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/test_server.rb +23 -0
- metadata +192 -0
Binary file
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<html><head><link rel="stylesheet" type="text/css" href="news.css">
|
2
|
+
<link rel="shortcut icon" href="favicon.ico">
|
3
|
+
<script type="text/javascript">
|
4
|
+
function byId(id) {
|
5
|
+
return document.getElementById(id);
|
6
|
+
}
|
7
|
+
|
8
|
+
function vote(node) {
|
9
|
+
var v = node.id.split(/_/); // {'up', '123'}
|
10
|
+
var item = v[1];
|
11
|
+
|
12
|
+
// hide arrows
|
13
|
+
byId('up_' + item).style.visibility = 'hidden';
|
14
|
+
byId('down_' + item).style.visibility = 'hidden';
|
15
|
+
|
16
|
+
// ping server
|
17
|
+
var ping = new Image();
|
18
|
+
ping.src = node.href;
|
19
|
+
|
20
|
+
return false; // cancel browser nav
|
21
|
+
} </script><title>Hacker News</title></head><body><center><table border=0 cellpadding=0 cellspacing=0 width="85%" bgcolor=#f6f6ef><tr><td bgcolor=#ff6600><table border=0 cellpadding=0 cellspacing=0 width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="http://ycombinator.com"><img src="y18.gif" width=18 height=18 style="border:1px #ffffff solid;"></img></a></td><td style="line-height:12pt; height:10px;"><span class="pagetop"><b><a href="news">Hacker News</a></b><img src="s.gif" height=1 width=10><a href="newest">new</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a></span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop"><a href="newslogin?whence=%6e%65%77%73">login</a></span></td></tr></table></td></tr><tr style="height:10px"></tr><tr><td><table border=0 cellpadding=0 cellspacing=0><tr><td align=right valign=top class="title">1.</td><td><center><a id=up_5674230 href="vote?for=5674230&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674230></span></center></td><td class="title"><a href="http://www.motherjones.com/tom-philpott/2013/05/7-dodgy-foodag-practices-banned-europe-just-fine-here">Food Practices Banned in Europe But Allowed in the US</a><span class="comhead"> (motherjones.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674230>117 points</span> by <a href="user?id=casca">casca</a> 2 hours ago | <a href="item?id=5674230">70 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">2.</td><td><center><a id=up_5674193 href="vote?for=5674193&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674193></span></center></td><td class="title"><a href="http://news.cnet.com/8301-13578_3-57583395-38/doj-we-dont-need-warrants-for-e-mail-facebook-chats/">DOJ: We don't need warrants for e-mail, Facebook chats</a><span class="comhead"> (cnet.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674193>46 points</span> by <a href="user?id=declan">declan</a> 2 hours ago | <a href="item?id=5674193">22 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">3.</td><td><center><a id=up_5673628 href="vote?for=5673628&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673628></span></center></td><td class="title"><a href="http://peternixey.com/post/49928526270/dear-apple-lets-talk-about-photos">Dear Apple, let's talk about photos</a><span class="comhead"> (peternixey.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673628>206 points</span> by <a href="user?id=robheaton">robheaton</a> 4 hours ago | <a href="item?id=5673628">194 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">4.</td><td><center><a id=up_5674380 href="vote?for=5674380&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674380></span></center></td><td class="title"><a href="http://37signals.com/reportcard">37signals report card</a><span class="comhead"> (37signals.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674380>38 points</span> by <a href="user?id=wlll">wlll</a> 1 hour ago | <a href="item?id=5674380">10 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">5.</td><td><center><a id=up_5673339 href="vote?for=5673339&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673339></span></center></td><td class="title"><a href="https://planscope.io/blog/the-freelancers-guide-to-recurring-revenue/">The Freelancer's Guide to Recurring Revenue</a><span class="comhead"> (planscope.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673339>173 points</span> by <a href="user?id=sherm8n">sherm8n</a> 6 hours ago | <a href="item?id=5673339">54 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">6.</td><td><center><a id=up_5674510 href="vote?for=5674510&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674510></span></center></td><td class="title"><a href="https://github.com/mDialog/scala-zeromq">scala-zeromq - thread-safe ZeroMQ sockets for Scala</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674510>24 points</span> by <a href="user?id=ninjakeyboard">ninjakeyboard</a> 1 hour ago | <a href="item?id=5674510">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">7.</td><td><center><a id=up_5674438 href="vote?for=5674438&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674438></span></center></td><td class="title"><a href="http://www.madinamerica.com/2012/02/why-anti-authoritarians-are-diagnosed-as-mentally-ill/">Why Anti-Authoritarians are Diagnosed as Mentally Ill (2012)</a><span class="comhead"> (madinamerica.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674438>24 points</span> by <a href="user?id=yesbabyyes">yesbabyyes</a> 1 hour ago | <a href="item?id=5674438">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">8.</td><td><center><a id=up_5673948 href="vote?for=5673948&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673948></span></center></td><td class="title"><a href="http://www.maths.manchester.ac.uk/~jm/Choreographies/">Planar Choreographies: odd orbital mechanics</a><span class="comhead"> (manchester.ac.uk) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673948>51 points</span> by <a href="user?id=ColinWright">ColinWright</a> 3 hours ago | <a href="item?id=5673948">13 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">9.</td><td><center><a id=up_5674838 href="vote?for=5674838&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674838></span></center></td><td class="title"><a href="http://baohaojun.github.io/blog/2013/05/08/stripping-kernel-code-for-reading.html">Stripping kernel/uboot source to 10% for code reading</a><span class="comhead"> (baohaojun.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674838>10 points</span> by <a href="user?id=baohaojun">baohaojun</a> 51 minutes ago | <a href="item?id=5674838">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">10.</td><td><center><a id=up_5673356 href="vote?for=5673356&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673356></span></center></td><td class="title"><a href="http://blog.bitops.com/blog/2013/05/01/unreal-javascript/">Unreal JavaScript</a><span class="comhead"> (bitops.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673356>100 points</span> by <a href="user?id=ndr">ndr</a> 6 hours ago | <a href="item?id=5673356">49 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">11.</td><td><center><a id=up_5673544 href="vote?for=5673544&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673544></span></center></td><td class="title"><a href="http://sidekicksrc.com/post/if-you-love-automation-why-is-your-development-environment-manual/">If you love automation, why is your development environment manual?</a><span class="comhead"> (sidekicksrc.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673544>76 points</span> by <a href="user?id=timruffles">timruffles</a> 5 hours ago | <a href="item?id=5673544">63 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">12.</td><td><center><a id=up_5674279 href="vote?for=5674279&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674279></span></center></td><td class="title"><a href="http://www.mobilesort.com/blog/ios-sale-numbers-by-app-store-rank.html">iOS Sale Numbers By App Store Rank</a><span class="comhead"> (mobilesort.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674279>17 points</span> by <a href="user?id=chrisa">chrisa</a> 2 hours ago | <a href="item?id=5674279">7 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">13.</td><td><center><a id=up_5674150 href="vote?for=5674150&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674150></span></center></td><td class="title"><a href="http://minimaxir.com/2013/05/stones-of-jordan/">Diablo III Economy Broken by an Integer Overflow Bug</a><span class="comhead"> (minimaxir.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674150>58 points</span> by <a href="user?id=minimaxir">minimaxir</a> 2 hours ago | <a href="item?id=5674150">61 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">14.</td><td><center><a id=up_5670719 href="vote?for=5670719&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5670719></span></center></td><td class="title"><a href="https://www.eff.org/deeplinks/2013/05/why-isnt-gatsby-public-domain">Why Isn't Gatsby in the Public Domain?</a><span class="comhead"> (eff.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5670719>526 points</span> by <a href="user?id=ninthfrank07">ninthfrank07</a> 19 hours ago | <a href="item?id=5670719">159 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">15.</td><td><center><a id=up_5672515 href="vote?for=5672515&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672515></span></center></td><td class="title"><a href="http://www.flattestroute.com/">Show HN: Flattest Route - A web app to help you avoid hills in SF</a><span class="comhead"> (flattestroute.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672515>200 points</span> by <a href="user?id=jonny_eh">jonny_eh</a> 11 hours ago | <a href="item?id=5672515">67 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">16.</td><td><center><a id=up_5674263 href="vote?for=5674263&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674263></span></center></td><td class="title"><a href="http://www.pewsocialtrends.org/2013/05/07/gun-homicide-rate-down-49-since-1993-peak-public-unaware/">Gun Homicide Rate Down 49% Since 1993 Peak; Public Unaware</a><span class="comhead"> (pewsocialtrends.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674263>141 points</span> by <a href="user?id=krg">krg</a> 2 hours ago | <a href="item?id=5674263">173 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">17.</td><td><center><a id=up_5674962 href="vote?for=5674962&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674962></span></center></td><td class="title"><a href="http://weathersignal.com/" rel="nofollow">Pocket Meteorology: Using Android Phones to Crowdsource the Weather</a><span class="comhead"> (weathersignal.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674962>5 points</span> by <a href="user?id=exap">exap</a> 30 minutes ago | <a href="item?id=5674962">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">18.</td><td><center><a id=up_5675038 href="vote?for=5675038&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5675038></span></center></td><td class="title"><a href="http://www.allanberger.com/post/49938151321/how-to-make-your-apps-retina-ready" rel="nofollow">How to make your Apps Retina ready</a><span class="comhead"> (allanberger.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5675038>4 points</span> by <a href="user?id=andreasklinger">andreasklinger</a> 16 minutes ago | <a href="item?id=5675038">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">19.</td><td><center><a id=up_5673480 href="vote?for=5673480&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673480></span></center></td><td class="title"><a href="http://thecodeartist.blogspot.com/2013/05/sensors-on-google-glass.html">Sensors on Google Glass</a><span class="comhead"> (thecodeartist.blogspot.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673480>44 points</span> by <a href="user?id=cvs268">cvs268</a> 5 hours ago | <a href="item?id=5673480">17 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">20.</td><td><center><a id=up_5673032 href="vote?for=5673032&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673032></span></center></td><td class="title"><a href="https://github.com/d11wtq/boris">Boris: A tiny but robust REPL for PHP</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673032>90 points</span> by <a href="user?id=nodesocket">nodesocket</a> 8 hours ago | <a href="item?id=5673032">36 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">21.</td><td><center><a id=up_5674755 href="vote?for=5674755&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674755></span></center></td><td class="title"><a href="http://www.washingtonpost.com/local/education/free-online-textbooks-with-conditions/2013/05/07/b49364ce-b761-11e2-92f3-f291801936b8_story.html?hpid=z9">Coursera to offer students free online textbooks, with conditions</a><span class="comhead"> (washingtonpost.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674755>6 points</span> by <a href="user?id=sonabinu">sonabinu</a> 1 hour ago | <a href="item?id=5674755">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">22.</td><td></td><td class="title"><a href="item?id=5674229">Join us and disrupt one of the world's worst monopolies</a></td></tr><tr><td colspan=2></td><td class="subtext">2 hours ago</td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">23.</td><td><center><a id=up_5673463 href="vote?for=5673463&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673463></span></center></td><td class="title"><a href="http://phoboslab.org/log/2013/05/mpeg1-video-decoder-in-javascript">MPEG1 Video Decoder in JavaScript</a><span class="comhead"> (phoboslab.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673463>39 points</span> by <a href="user?id=phoboslab">phoboslab</a> 5 hours ago | <a href="item?id=5673463">11 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">24.</td><td><center><a id=up_5671652 href="vote?for=5671652&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5671652></span></center></td><td class="title"><a href="https://twitter.com/id_aa_carmack/status/331918309916295168">John Carmack starting port of Wolf 3D in Haskell</a><span class="comhead"> (twitter.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5671652>274 points</span> by <a href="user?id=bobfunk">bobfunk</a> 16 hours ago | <a href="item?id=5671652">108 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">25.</td><td><center><a id=up_5674417 href="vote?for=5674417&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674417></span></center></td><td class="title"><a href="http://www.d2.vu/">Demonoid returns, thanks to community hosting.</a><span class="comhead"> (d2.vu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674417>7 points</span> by <a href="user?id=bichiliad">bichiliad</a> 1 hour ago | <a href="item?id=5674417">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">26.</td><td><center><a id=up_5672875 href="vote?for=5672875&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672875></span></center></td><td class="title"><a href="http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html">A regular expression matcher By Rob Pike and Brian Kernighan (2007)</a><span class="comhead"> (princeton.edu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672875>78 points</span> by <a href="user?id=sid6376">sid6376</a> 9 hours ago | <a href="item?id=5672875">34 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">27.</td><td><center><a id=up_5672354 href="vote?for=5672354&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672354></span></center></td><td class="title"><a href="http://nemaload.davidad.org/">Show HN: My friend's project to simulate an entire C. Elegans</a><span class="comhead"> (davidad.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672354>127 points</span> by <a href="user?id=SlyShy">SlyShy</a> 12 hours ago | <a href="item?id=5672354">41 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">28.</td><td><center><a id=up_5674550 href="vote?for=5674550&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674550></span></center></td><td class="title"><a href="http://blog.mortardata.com/post/49934459499/recommender-systems-for-free">How to get Hilary Mason to build your recommender for free</a><span class="comhead"> (mortardata.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674550>34 points</span> by <a href="user?id=kky">kky</a> 1 hour ago | <a href="item?id=5674550">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">29.</td><td><center><a id=up_5674434 href="vote?for=5674434&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674434></span></center></td><td class="title"><a href="http://mailman.ravenbrook.com/pipermail/mps-discussion/2013-May/000128.html">First release of MPS GC with full documentation (1.111.0)</a><span class="comhead"> (ravenbrook.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674434>6 points</span> by <a href="user?id=BruceM">BruceM</a> 1 hour ago | <a href="item?id=5674434">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">30.</td><td><center><a id=up_5668374 href="vote?for=5668374&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5668374></span></center></td><td class="title"><a href="http://jonobr1.github.io/two.js">Two.js</a><span class="comhead"> (jonobr1.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5668374>466 points</span> by <a href="user?id=jgv">jgv</a> 1 day ago | <a href="item?id=5668374">61 comments</a></td></tr><tr style="height:5px"></tr><tr style="height:10px"></tr><tr><td colspan=2></td><td class="title"><a href="news2">More</a></td></tr></table></td></tr><tr><td><img src="s.gif" height=10 width=0><table width="100%" cellspacing=0 cellpadding=1><tr><td bgcolor=#ff6600></td></tr></table><br>
|
22
|
+
<center><span class="yclinks"><a href="lists">Lists</a> | <a href="rss">RSS</a> | <a href="http://ycombinator.com/bookmarklet.html">Bookmarklet</a> | <a href="http://ycombinator.com/newsguidelines.html">Guidelines</a> | <a href="http://ycombinator.com/newsfaq.html">FAQ</a> | <a href="dmca.html">DMCA</a> | <a href="http://ycombinator.com/newsnews.html">News News</a> | <a href="item?id=363">Feature Requests</a> | <a href="http://ycombinator.com">Y Combinator</a> | <a href="http://ycombinator.com/apply.html">Apply</a> | <a href="http://ycombinator.com/lib.html">Library</a></span><br><br>
|
23
|
+
<form method=get action="//www.hnsearch.com/search#request/all">Search: <input type=text name="q" value="" size=17></form><br>
|
24
|
+
</center></td></tr></table></center></body></html>
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe RubyTikaApp do
|
6
|
+
before(:each) do
|
7
|
+
doc_path = "#{File.join(File.dirname(__FILE__))}/docs"
|
8
|
+
|
9
|
+
@test_file = "#{doc_path}/graph sampling simplex - 11.pdf"
|
10
|
+
|
11
|
+
@cnn_com_file = "#{doc_path}/cnn.com"
|
12
|
+
@news_ycombinator_com_file = "#{doc_path}/news.ycombinator.com"
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'Error' do
|
16
|
+
it 'has an error' do
|
17
|
+
expect do
|
18
|
+
rta = RubyTikaApp.new('No file')
|
19
|
+
rta.to_xml
|
20
|
+
end.to raise_error(RuntimeError)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'CommandFailedError' do
|
25
|
+
it 'is raised correctly' do
|
26
|
+
expect do
|
27
|
+
rta = RubyTikaApp.new('/file_not_found.pdf')
|
28
|
+
rta.to_text
|
29
|
+
end.to raise_error(RubyTikaApp::CommandFailedError)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#to_xml' do
|
34
|
+
it 'header' do
|
35
|
+
rta = RubyTikaApp.new(@test_file)
|
36
|
+
expect(rta.to_xml[0..37]).to eq('<?xml version="1.0" encoding="UTF-8"?>')
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'middle' do
|
40
|
+
rta = RubyTikaApp.new(@test_file)
|
41
|
+
xml = rta.to_xml
|
42
|
+
|
43
|
+
xml_size = xml.size / 2
|
44
|
+
|
45
|
+
expect(xml[xml_size..(xml_size + 100)]).to eq("pply USDSG, we\nneed to change a directed graph Gd to a symmetric graph\nG. This methodology is also us")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#to_html' do
|
50
|
+
it 'header' do
|
51
|
+
rta = RubyTikaApp.new(@test_file)
|
52
|
+
expect(rta.to_html[0..42]).to eq('<html xmlns="http://www.w3.org/1999/xhtml">')
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'middle' do
|
56
|
+
rta = RubyTikaApp.new(@test_file)
|
57
|
+
expect(rta.to_html[1000...1100]).to eq("nfo:modified\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe '#to_json' do
|
62
|
+
it 'header' do
|
63
|
+
rta = RubyTikaApp.new(@test_file)
|
64
|
+
expect(rta.to_json[0..42]).to eq('{"Application":"\\u0027Certified by IEEE PDF')
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'middle' do
|
68
|
+
rta = RubyTikaApp.new(@test_file)
|
69
|
+
expect(rta.to_json[100...150]).to eq('"171510","Content-Type":"application/pdf","Creatio')
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#to_text' do
|
74
|
+
it 'header' do
|
75
|
+
rta = RubyTikaApp.new(@test_file)
|
76
|
+
expect(rta.to_text[0..42]).to eq("Understanding Graph Sampling Algorithms\nfor")
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'middle' do
|
80
|
+
rta = RubyTikaApp.new(@test_file)
|
81
|
+
expect(rta.to_text[100...150]).to eq("in Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixi")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
describe '#to_text_main' do
|
86
|
+
it 'header' do
|
87
|
+
rta = RubyTikaApp.new(@test_file)
|
88
|
+
expect(rta.to_text_main[0..42]).to eq('Understanding Graph Sampling Algorithms for')
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'middle' do
|
92
|
+
rta = RubyTikaApp.new(@test_file)
|
93
|
+
expect(rta.to_text_main[100...150]).to eq("n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe '#to_metadata' do
|
98
|
+
it 'header' do
|
99
|
+
rta = RubyTikaApp.new(@test_file)
|
100
|
+
expect(rta.to_metadata[0..42]).to eq("Application: 'Certified by IEEE PDFeXpress ")
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'middle' do
|
104
|
+
rta = RubyTikaApp.new(@test_file)
|
105
|
+
expect(rta.to_metadata[100...150]).to eq("Type: application/pdf\nCreation-Date: 2011-03-29T12")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe 'external URLs' do
|
110
|
+
it 'should be able to parse an http url' do
|
111
|
+
rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
|
112
|
+
expect(rta.to_text).to_not be_nil
|
113
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@cnn_com_file).to_text)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'should be able to parse another http url' do
|
117
|
+
rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
|
118
|
+
expect(rta.to_text).to_not be_nil
|
119
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
SimpleCov.start
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'bundler/setup'
|
8
|
+
|
9
|
+
require 'ruby_tika_app'
|
10
|
+
require 'rspec'
|
11
|
+
|
12
|
+
# Include all files under spec/support
|
13
|
+
Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
|
14
|
+
|
15
|
+
# Start a local rack server to serve up test pages.
|
16
|
+
@server_thread = Thread.new do
|
17
|
+
Rack::Handler::Thin.run(MyApp::Test::Server.new, Port: 9299, Host: '127.0.0.1')
|
18
|
+
end
|
19
|
+
|
20
|
+
sleep(1) # wait a sec for the server to be booted
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rack'
|
5
|
+
|
6
|
+
module MyApp
|
7
|
+
module Test
|
8
|
+
class Server
|
9
|
+
def call(env)
|
10
|
+
@root = "#{__dir__}/../docs/"
|
11
|
+
path = Rack::Utils.unescape(env['PATH_INFO'])
|
12
|
+
path += 'index.html' if path == '/'
|
13
|
+
file = @root + path.to_s
|
14
|
+
|
15
|
+
if File.exist?(file)
|
16
|
+
[200, { 'Content-Type' => 'text/html' }, File.read(file)]
|
17
|
+
else
|
18
|
+
[404, { 'Content-Type' => 'text/plain' }, 'file not found']
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,192 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby_tika_app_lambda
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.25.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Parker
|
8
|
+
- Eric Musgrove
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2024-12-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: open4
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bundler
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 1.0.15
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: 1.0.15
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: json
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: pry
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rack
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: rake
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: rspec
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: 3.9.0
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 3.9.0
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: simplecov
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
type: :development
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: thin
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
type: :development
|
134
|
+
prerelease: false
|
135
|
+
version_requirements: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
description: Wrapper around the tika-app jar
|
141
|
+
email:
|
142
|
+
- mrcsparker@gmail.com
|
143
|
+
- eric.musgrove@stoatlabs.com
|
144
|
+
executables: []
|
145
|
+
extensions: []
|
146
|
+
extra_rdoc_files: []
|
147
|
+
files:
|
148
|
+
- ".gitignore"
|
149
|
+
- ".rspec"
|
150
|
+
- Gemfile
|
151
|
+
- HISTORY
|
152
|
+
- LICENSE
|
153
|
+
- README.md
|
154
|
+
- Rakefile
|
155
|
+
- ext/tika-config.xml
|
156
|
+
- lib/ruby_tika_app.rb
|
157
|
+
- ruby_tika_app.gemspec
|
158
|
+
- spec/docs/cnn.com
|
159
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
160
|
+
- spec/docs/news.ycombinator.com
|
161
|
+
- spec/ruby_tika_app_spec.rb
|
162
|
+
- spec/spec_helper.rb
|
163
|
+
- spec/support/test_server.rb
|
164
|
+
homepage: https://github.com/StoatLabs/ruby_tika_app
|
165
|
+
licenses: []
|
166
|
+
metadata: {}
|
167
|
+
post_install_message:
|
168
|
+
rdoc_options: []
|
169
|
+
require_paths:
|
170
|
+
- lib
|
171
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
172
|
+
requirements:
|
173
|
+
- - ">="
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
requirements: []
|
182
|
+
rubygems_version: 3.5.22
|
183
|
+
signing_key:
|
184
|
+
specification_version: 4
|
185
|
+
summary: Wrapper around the tika-app jar
|
186
|
+
test_files:
|
187
|
+
- spec/docs/cnn.com
|
188
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
189
|
+
- spec/docs/news.ycombinator.com
|
190
|
+
- spec/ruby_tika_app_spec.rb
|
191
|
+
- spec/spec_helper.rb
|
192
|
+
- spec/support/test_server.rb
|