ruby_tika_app_lambda 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ <html><head><link rel="stylesheet" type="text/css" href="news.css">
2
+ <link rel="shortcut icon" href="favicon.ico">
3
+ <script type="text/javascript">
4
+ function byId(id) {
5
+ return document.getElementById(id);
6
+ }
7
+
8
+ function vote(node) {
9
+ var v = node.id.split(/_/); // {'up', '123'}
10
+ var item = v[1];
11
+
12
+ // hide arrows
13
+ byId('up_' + item).style.visibility = 'hidden';
14
+ byId('down_' + item).style.visibility = 'hidden';
15
+
16
+ // ping server
17
+ var ping = new Image();
18
+ ping.src = node.href;
19
+
20
+ return false; // cancel browser nav
21
+ } </script><title>Hacker News</title></head><body><center><table border=0 cellpadding=0 cellspacing=0 width="85%" bgcolor=#f6f6ef><tr><td bgcolor=#ff6600><table border=0 cellpadding=0 cellspacing=0 width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="http://ycombinator.com"><img src="y18.gif" width=18 height=18 style="border:1px #ffffff solid;"></img></a></td><td style="line-height:12pt; height:10px;"><span class="pagetop"><b><a href="news">Hacker News</a></b><img src="s.gif" height=1 width=10><a href="newest">new</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a></span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop"><a href="newslogin?whence=%6e%65%77%73">login</a></span></td></tr></table></td></tr><tr style="height:10px"></tr><tr><td><table border=0 cellpadding=0 cellspacing=0><tr><td align=right valign=top class="title">1.</td><td><center><a id=up_5674230 href="vote?for=5674230&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674230></span></center></td><td class="title"><a href="http://www.motherjones.com/tom-philpott/2013/05/7-dodgy-foodag-practices-banned-europe-just-fine-here">Food Practices Banned in Europe But Allowed in the US</a><span class="comhead"> (motherjones.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674230>117 points</span> by <a href="user?id=casca">casca</a> 2 hours ago | <a href="item?id=5674230">70 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">2.</td><td><center><a id=up_5674193 href="vote?for=5674193&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674193></span></center></td><td class="title"><a href="http://news.cnet.com/8301-13578_3-57583395-38/doj-we-dont-need-warrants-for-e-mail-facebook-chats/">DOJ: We don't need warrants for e-mail, Facebook chats</a><span class="comhead"> (cnet.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674193>46 points</span> by <a href="user?id=declan">declan</a> 2 hours ago | <a href="item?id=5674193">22 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">3.</td><td><center><a id=up_5673628 href="vote?for=5673628&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673628></span></center></td><td class="title"><a href="http://peternixey.com/post/49928526270/dear-apple-lets-talk-about-photos">Dear Apple, let's talk about photos</a><span class="comhead"> (peternixey.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673628>206 points</span> by <a href="user?id=robheaton">robheaton</a> 4 hours ago | <a href="item?id=5673628">194 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">4.</td><td><center><a id=up_5674380 href="vote?for=5674380&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674380></span></center></td><td class="title"><a href="http://37signals.com/reportcard">37signals report card</a><span class="comhead"> (37signals.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674380>38 points</span> by <a href="user?id=wlll">wlll</a> 1 hour ago | <a href="item?id=5674380">10 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">5.</td><td><center><a id=up_5673339 href="vote?for=5673339&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673339></span></center></td><td class="title"><a href="https://planscope.io/blog/the-freelancers-guide-to-recurring-revenue/">The Freelancer's Guide to Recurring Revenue</a><span class="comhead"> (planscope.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673339>173 points</span> by <a href="user?id=sherm8n">sherm8n</a> 6 hours ago | <a href="item?id=5673339">54 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">6.</td><td><center><a id=up_5674510 href="vote?for=5674510&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674510></span></center></td><td class="title"><a href="https://github.com/mDialog/scala-zeromq">scala-zeromq - thread-safe ZeroMQ sockets for Scala</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674510>24 points</span> by <a href="user?id=ninjakeyboard">ninjakeyboard</a> 1 hour ago | <a href="item?id=5674510">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">7.</td><td><center><a id=up_5674438 href="vote?for=5674438&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674438></span></center></td><td class="title"><a href="http://www.madinamerica.com/2012/02/why-anti-authoritarians-are-diagnosed-as-mentally-ill/">Why Anti-Authoritarians are Diagnosed as Mentally Ill (2012)</a><span class="comhead"> (madinamerica.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674438>24 points</span> by <a href="user?id=yesbabyyes">yesbabyyes</a> 1 hour ago | <a href="item?id=5674438">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">8.</td><td><center><a id=up_5673948 href="vote?for=5673948&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673948></span></center></td><td class="title"><a href="http://www.maths.manchester.ac.uk/~jm/Choreographies/">Planar Choreographies: odd orbital mechanics</a><span class="comhead"> (manchester.ac.uk) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673948>51 points</span> by <a href="user?id=ColinWright">ColinWright</a> 3 hours ago | <a href="item?id=5673948">13 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">9.</td><td><center><a id=up_5674838 href="vote?for=5674838&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674838></span></center></td><td class="title"><a href="http://baohaojun.github.io/blog/2013/05/08/stripping-kernel-code-for-reading.html">Stripping kernel/uboot source to 10% for code reading</a><span class="comhead"> (baohaojun.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674838>10 points</span> by <a href="user?id=baohaojun">baohaojun</a> 51 minutes ago | <a href="item?id=5674838">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">10.</td><td><center><a id=up_5673356 href="vote?for=5673356&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673356></span></center></td><td class="title"><a href="http://blog.bitops.com/blog/2013/05/01/unreal-javascript/">Unreal JavaScript</a><span class="comhead"> (bitops.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673356>100 points</span> by <a href="user?id=ndr">ndr</a> 6 hours ago | <a href="item?id=5673356">49 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">11.</td><td><center><a id=up_5673544 href="vote?for=5673544&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673544></span></center></td><td class="title"><a href="http://sidekicksrc.com/post/if-you-love-automation-why-is-your-development-environment-manual/">If you love automation, why is your development environment manual?</a><span class="comhead"> (sidekicksrc.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673544>76 points</span> by <a href="user?id=timruffles">timruffles</a> 5 hours ago | <a href="item?id=5673544">63 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">12.</td><td><center><a id=up_5674279 href="vote?for=5674279&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674279></span></center></td><td class="title"><a href="http://www.mobilesort.com/blog/ios-sale-numbers-by-app-store-rank.html">iOS Sale Numbers By App Store Rank</a><span class="comhead"> (mobilesort.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674279>17 points</span> by <a href="user?id=chrisa">chrisa</a> 2 hours ago | <a href="item?id=5674279">7 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">13.</td><td><center><a id=up_5674150 href="vote?for=5674150&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674150></span></center></td><td class="title"><a href="http://minimaxir.com/2013/05/stones-of-jordan/">Diablo III Economy Broken by an Integer Overflow Bug</a><span class="comhead"> (minimaxir.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674150>58 points</span> by <a href="user?id=minimaxir">minimaxir</a> 2 hours ago | <a href="item?id=5674150">61 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">14.</td><td><center><a id=up_5670719 href="vote?for=5670719&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5670719></span></center></td><td class="title"><a href="https://www.eff.org/deeplinks/2013/05/why-isnt-gatsby-public-domain">Why Isn't Gatsby in the Public Domain?</a><span class="comhead"> (eff.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5670719>526 points</span> by <a href="user?id=ninthfrank07">ninthfrank07</a> 19 hours ago | <a href="item?id=5670719">159 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">15.</td><td><center><a id=up_5672515 href="vote?for=5672515&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672515></span></center></td><td class="title"><a href="http://www.flattestroute.com/">Show HN: Flattest Route - A web app to help you avoid hills in SF</a><span class="comhead"> (flattestroute.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672515>200 points</span> by <a href="user?id=jonny_eh">jonny_eh</a> 11 hours ago | <a href="item?id=5672515">67 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">16.</td><td><center><a id=up_5674263 href="vote?for=5674263&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674263></span></center></td><td class="title"><a href="http://www.pewsocialtrends.org/2013/05/07/gun-homicide-rate-down-49-since-1993-peak-public-unaware/">Gun Homicide Rate Down 49% Since 1993 Peak; Public Unaware</a><span class="comhead"> (pewsocialtrends.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674263>141 points</span> by <a href="user?id=krg">krg</a> 2 hours ago | <a href="item?id=5674263">173 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">17.</td><td><center><a id=up_5674962 href="vote?for=5674962&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674962></span></center></td><td class="title"><a href="http://weathersignal.com/" rel="nofollow">Pocket Meteorology: Using Android Phones to Crowdsource the Weather</a><span class="comhead"> (weathersignal.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674962>5 points</span> by <a href="user?id=exap">exap</a> 30 minutes ago | <a href="item?id=5674962">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">18.</td><td><center><a id=up_5675038 href="vote?for=5675038&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5675038></span></center></td><td class="title"><a href="http://www.allanberger.com/post/49938151321/how-to-make-your-apps-retina-ready" rel="nofollow">How to make your Apps Retina ready</a><span class="comhead"> (allanberger.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5675038>4 points</span> by <a href="user?id=andreasklinger">andreasklinger</a> 16 minutes ago | <a href="item?id=5675038">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">19.</td><td><center><a id=up_5673480 href="vote?for=5673480&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673480></span></center></td><td class="title"><a href="http://thecodeartist.blogspot.com/2013/05/sensors-on-google-glass.html">Sensors on Google Glass</a><span class="comhead"> (thecodeartist.blogspot.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673480>44 points</span> by <a href="user?id=cvs268">cvs268</a> 5 hours ago | <a href="item?id=5673480">17 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">20.</td><td><center><a id=up_5673032 href="vote?for=5673032&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673032></span></center></td><td class="title"><a href="https://github.com/d11wtq/boris">Boris: A tiny but robust REPL for PHP</a><span class="comhead"> (github.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673032>90 points</span> by <a href="user?id=nodesocket">nodesocket</a> 8 hours ago | <a href="item?id=5673032">36 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">21.</td><td><center><a id=up_5674755 href="vote?for=5674755&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674755></span></center></td><td class="title"><a href="http://www.washingtonpost.com/local/education/free-online-textbooks-with-conditions/2013/05/07/b49364ce-b761-11e2-92f3-f291801936b8_story.html?hpid=z9">Coursera to offer students free online textbooks, with conditions</a><span class="comhead"> (washingtonpost.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674755>6 points</span> by <a href="user?id=sonabinu">sonabinu</a> 1 hour ago | <a href="item?id=5674755">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">22.</td><td></td><td class="title"><a href="item?id=5674229">Join us and disrupt one of the world's worst monopolies</a></td></tr><tr><td colspan=2></td><td class="subtext">2 hours ago</td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">23.</td><td><center><a id=up_5673463 href="vote?for=5673463&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5673463></span></center></td><td class="title"><a href="http://phoboslab.org/log/2013/05/mpeg1-video-decoder-in-javascript">MPEG1 Video Decoder in JavaScript</a><span class="comhead"> (phoboslab.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5673463>39 points</span> by <a href="user?id=phoboslab">phoboslab</a> 5 hours ago | <a href="item?id=5673463">11 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">24.</td><td><center><a id=up_5671652 href="vote?for=5671652&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5671652></span></center></td><td class="title"><a href="https://twitter.com/id_aa_carmack/status/331918309916295168">John Carmack starting port of Wolf 3D in Haskell</a><span class="comhead"> (twitter.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5671652>274 points</span> by <a href="user?id=bobfunk">bobfunk</a> 16 hours ago | <a href="item?id=5671652">108 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">25.</td><td><center><a id=up_5674417 href="vote?for=5674417&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674417></span></center></td><td class="title"><a href="http://www.d2.vu/">Demonoid returns, thanks to community hosting.</a><span class="comhead"> (d2.vu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674417>7 points</span> by <a href="user?id=bichiliad">bichiliad</a> 1 hour ago | <a href="item?id=5674417">2 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">26.</td><td><center><a id=up_5672875 href="vote?for=5672875&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672875></span></center></td><td class="title"><a href="http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html">A regular expression matcher By Rob Pike and Brian Kernighan (2007)</a><span class="comhead"> (princeton.edu) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672875>78 points</span> by <a href="user?id=sid6376">sid6376</a> 9 hours ago | <a href="item?id=5672875">34 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">27.</td><td><center><a id=up_5672354 href="vote?for=5672354&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5672354></span></center></td><td class="title"><a href="http://nemaload.davidad.org/">Show HN: My friend's project to simulate an entire C. Elegans</a><span class="comhead"> (davidad.org) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5672354>127 points</span> by <a href="user?id=SlyShy">SlyShy</a> 12 hours ago | <a href="item?id=5672354">41 comments</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">28.</td><td><center><a id=up_5674550 href="vote?for=5674550&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674550></span></center></td><td class="title"><a href="http://blog.mortardata.com/post/49934459499/recommender-systems-for-free">How to get Hilary Mason to build your recommender for free</a><span class="comhead"> (mortardata.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674550>34 points</span> by <a href="user?id=kky">kky</a> 1 hour ago | <a href="item?id=5674550">discuss</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">29.</td><td><center><a id=up_5674434 href="vote?for=5674434&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5674434></span></center></td><td class="title"><a href="http://mailman.ravenbrook.com/pipermail/mps-discussion/2013-May/000128.html">First release of MPS GC with full documentation (1.111.0)</a><span class="comhead"> (ravenbrook.com) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5674434>6 points</span> by <a href="user?id=BruceM">BruceM</a> 1 hour ago | <a href="item?id=5674434">1 comment</a></td></tr><tr style="height:5px"></tr><tr><td align=right valign=top class="title">30.</td><td><center><a id=up_5668374 href="vote?for=5668374&dir=up&whence=%6e%65%77%73"><img src="grayarrow.gif" border=0 vspace=3 hspace=2></a><span id=down_5668374></span></center></td><td class="title"><a href="http://jonobr1.github.io/two.js">Two.js</a><span class="comhead"> (jonobr1.github.io) </span></td></tr><tr><td colspan=2></td><td class="subtext"><span id=score_5668374>466 points</span> by <a href="user?id=jgv">jgv</a> 1 day ago | <a href="item?id=5668374">61 comments</a></td></tr><tr style="height:5px"></tr><tr style="height:10px"></tr><tr><td colspan=2></td><td class="title"><a href="news2">More</a></td></tr></table></td></tr><tr><td><img src="s.gif" height=10 width=0><table width="100%" cellspacing=0 cellpadding=1><tr><td bgcolor=#ff6600></td></tr></table><br>
22
+ <center><span class="yclinks"><a href="lists">Lists</a> | <a href="rss">RSS</a> | <a href="http://ycombinator.com/bookmarklet.html">Bookmarklet</a> | <a href="http://ycombinator.com/newsguidelines.html">Guidelines</a> | <a href="http://ycombinator.com/newsfaq.html">FAQ</a> | <a href="dmca.html">DMCA</a> | <a href="http://ycombinator.com/newsnews.html">News News</a> | <a href="item?id=363">Feature Requests</a> | <a href="http://ycombinator.com">Y Combinator</a> | <a href="http://ycombinator.com/apply.html">Apply</a> | <a href="http://ycombinator.com/lib.html">Library</a></span><br><br>
23
+ <form method=get action="//www.hnsearch.com/search#request/all">Search: <input type=text name="q" value="" size=17></form><br>
24
+ </center></td></tr></table></center></body></html>
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe RubyTikaApp do
6
+ before(:each) do
7
+ doc_path = "#{File.join(File.dirname(__FILE__))}/docs"
8
+
9
+ @test_file = "#{doc_path}/graph sampling simplex - 11.pdf"
10
+
11
+ @cnn_com_file = "#{doc_path}/cnn.com"
12
+ @news_ycombinator_com_file = "#{doc_path}/news.ycombinator.com"
13
+ end
14
+
15
+ describe 'Error' do
16
+ it 'has an error' do
17
+ expect do
18
+ rta = RubyTikaApp.new('No file')
19
+ rta.to_xml
20
+ end.to raise_error(RuntimeError)
21
+ end
22
+ end
23
+
24
+ describe 'CommandFailedError' do
25
+ it 'is raised correctly' do
26
+ expect do
27
+ rta = RubyTikaApp.new('/file_not_found.pdf')
28
+ rta.to_text
29
+ end.to raise_error(RubyTikaApp::CommandFailedError)
30
+ end
31
+ end
32
+
33
+ describe '#to_xml' do
34
+ it 'header' do
35
+ rta = RubyTikaApp.new(@test_file)
36
+ expect(rta.to_xml[0..37]).to eq('<?xml version="1.0" encoding="UTF-8"?>')
37
+ end
38
+
39
+ it 'middle' do
40
+ rta = RubyTikaApp.new(@test_file)
41
+ xml = rta.to_xml
42
+
43
+ xml_size = xml.size / 2
44
+
45
+ expect(xml[xml_size..(xml_size + 100)]).to eq("pply USDSG, we\nneed to change a directed graph Gd to a symmetric graph\nG. This methodology is also us")
46
+ end
47
+ end
48
+
49
+ describe '#to_html' do
50
+ it 'header' do
51
+ rta = RubyTikaApp.new(@test_file)
52
+ expect(rta.to_html[0..42]).to eq('<html xmlns="http://www.w3.org/1999/xhtml">')
53
+ end
54
+
55
+ it 'middle' do
56
+ rta = RubyTikaApp.new(@test_file)
57
+ expect(rta.to_html[1000...1100]).to eq("nfo:modified\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00")
58
+ end
59
+ end
60
+
61
+ describe '#to_json' do
62
+ it 'header' do
63
+ rta = RubyTikaApp.new(@test_file)
64
+ expect(rta.to_json[0..42]).to eq('{"Application":"\\u0027Certified by IEEE PDF')
65
+ end
66
+
67
+ it 'middle' do
68
+ rta = RubyTikaApp.new(@test_file)
69
+ expect(rta.to_json[100...150]).to eq('"171510","Content-Type":"application/pdf","Creatio')
70
+ end
71
+ end
72
+
73
+ describe '#to_text' do
74
+ it 'header' do
75
+ rta = RubyTikaApp.new(@test_file)
76
+ expect(rta.to_text[0..42]).to eq("Understanding Graph Sampling Algorithms\nfor")
77
+ end
78
+
79
+ it 'middle' do
80
+ rta = RubyTikaApp.new(@test_file)
81
+ expect(rta.to_text[100...150]).to eq("in Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixi")
82
+ end
83
+ end
84
+
85
+ describe '#to_text_main' do
86
+ it 'header' do
87
+ rta = RubyTikaApp.new(@test_file)
88
+ expect(rta.to_text_main[0..42]).to eq('Understanding Graph Sampling Algorithms for')
89
+ end
90
+
91
+ it 'middle' do
92
+ rta = RubyTikaApp.new(@test_file)
93
+ expect(rta.to_text_main[100...150]).to eq("n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing")
94
+ end
95
+ end
96
+
97
+ describe '#to_metadata' do
98
+ it 'header' do
99
+ rta = RubyTikaApp.new(@test_file)
100
+ expect(rta.to_metadata[0..42]).to eq("Application: 'Certified by IEEE PDFeXpress ")
101
+ end
102
+
103
+ it 'middle' do
104
+ rta = RubyTikaApp.new(@test_file)
105
+ expect(rta.to_metadata[100...150]).to eq("Type: application/pdf\nCreation-Date: 2011-03-29T12")
106
+ end
107
+ end
108
+
109
+ describe 'external URLs' do
110
+ it 'should be able to parse an http url' do
111
+ rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
112
+ expect(rta.to_text).to_not be_nil
113
+ expect(rta.to_text).to eq(RubyTikaApp.new(@cnn_com_file).to_text)
114
+ end
115
+
116
+ it 'should be able to parse another http url' do
117
+ rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
118
+ expect(rta.to_text).to_not be_nil
119
+ expect(rta.to_text).to eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'simplecov'
4
+ SimpleCov.start
5
+
6
+ require 'rubygems'
7
+ require 'bundler/setup'
8
+
9
+ require 'ruby_tika_app'
10
+ require 'rspec'
11
+
12
+ # Include all files under spec/support
13
+ Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
14
+
15
+ # Start a local rack server to serve up test pages.
16
+ @server_thread = Thread.new do
17
+ Rack::Handler::Thin.run(MyApp::Test::Server.new, Port: 9299, Host: '127.0.0.1')
18
+ end
19
+
20
+ sleep(1) # wait a sec for the server to be booted
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rack'
5
+
6
+ module MyApp
7
+ module Test
8
+ class Server
9
+ def call(env)
10
+ @root = "#{__dir__}/../docs/"
11
+ path = Rack::Utils.unescape(env['PATH_INFO'])
12
+ path += 'index.html' if path == '/'
13
+ file = @root + path.to_s
14
+
15
+ if File.exist?(file)
16
+ [200, { 'Content-Type' => 'text/html' }, File.read(file)]
17
+ else
18
+ [404, { 'Content-Type' => 'text/plain' }, 'file not found']
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,192 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_tika_app_lambda
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.25.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Parker
8
+ - Eric Musgrove
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2024-12-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: open4
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: bundler
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 1.0.15
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: 1.0.15
42
+ - !ruby/object:Gem::Dependency
43
+ name: json
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: pry
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rack
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: rake
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ - !ruby/object:Gem::Dependency
99
+ name: rspec
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: 3.9.0
105
+ type: :development
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: 3.9.0
112
+ - !ruby/object:Gem::Dependency
113
+ name: simplecov
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: thin
128
+ requirement: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ type: :development
134
+ prerelease: false
135
+ version_requirements: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ description: Wrapper around the tika-app jar
141
+ email:
142
+ - mrcsparker@gmail.com
143
+ - eric.musgrove@stoatlabs.com
144
+ executables: []
145
+ extensions: []
146
+ extra_rdoc_files: []
147
+ files:
148
+ - ".gitignore"
149
+ - ".rspec"
150
+ - Gemfile
151
+ - HISTORY
152
+ - LICENSE
153
+ - README.md
154
+ - Rakefile
155
+ - ext/tika-config.xml
156
+ - lib/ruby_tika_app.rb
157
+ - ruby_tika_app.gemspec
158
+ - spec/docs/cnn.com
159
+ - spec/docs/graph sampling simplex - 11.pdf
160
+ - spec/docs/news.ycombinator.com
161
+ - spec/ruby_tika_app_spec.rb
162
+ - spec/spec_helper.rb
163
+ - spec/support/test_server.rb
164
+ homepage: https://github.com/StoatLabs/ruby_tika_app
165
+ licenses: []
166
+ metadata: {}
167
+ post_install_message:
168
+ rdoc_options: []
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ requirements: []
182
+ rubygems_version: 3.5.22
183
+ signing_key:
184
+ specification_version: 4
185
+ summary: Wrapper around the tika-app jar
186
+ test_files:
187
+ - spec/docs/cnn.com
188
+ - spec/docs/graph sampling simplex - 11.pdf
189
+ - spec/docs/news.ycombinator.com
190
+ - spec/ruby_tika_app_spec.rb
191
+ - spec/spec_helper.rb
192
+ - spec/support/test_server.rb