RubyGems - recordsearch - Versions diffs - 1.0.0 - Mend

recordsearch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

data/History.txt +12 -0
data/License.txt +340 -0
data/Manifest.txt +27 -0
data/README.txt +21 -0
data/Rakefile +4 -0
data/config/hoe.rb +70 -0
data/config/requirements.rb +17 -0
data/lib/recordsearch/data_source.rb +32 -0
data/lib/recordsearch/indexer.rb +28 -0
data/lib/recordsearch/search.rb +96 -0
data/lib/recordsearch/version.rb +9 -0
data/lib/recordsearch.rb +3 -0
data/log/debug.log +0 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/script/txt2html +74 -0
data/setup.rb +1585 -0
data/tasks/deployment.rake +27 -0
data/tasks/environment.rake +7 -0
data/tasks/website.rake +17 -0
data/test/test_helper.rb +21 -0
data/test/test_index.rb +14 -0
data/test/test_recordsearch.rb +40 -0
data/website/index.html +174 -0
data/website/index.txt +97 -0
data/website/javascripts/rounded_corners_lite.inc.js +285 -0
data/website/stylesheets/screen.css +138 -0
data/website/template.rhtml +53 -0
metadata +79 -0

data/tasks/deployment.rake ADDED Viewed

@@ -0,0 +1,27 @@
+desc 'Release the website and new gem version'
+task :deploy => [:check_version, :website, :release] do
+  puts "Remember to create SVN tag:"
+  puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
+    "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
+  puts "Suggested comment:"
+  puts "Tagging release #{CHANGES}"
+end
+desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
+task :local_deploy => [:website_generate, :install_gem]
+task :check_version do
+  unless ENV['VERSION']
+    puts 'Must pass a VERSION=x.y.z release version'
+    exit
+  end
+  unless ENV['VERSION'] == VERS
+    puts "Please update your version.rb to match the release version, currently #{VERS}"
+    exit
+  end
+end
+desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
+task :install_gem_no_doc => [:clean, :package] do
+  sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
+end

data/tasks/environment.rake ADDED Viewed

@@ -0,0 +1,7 @@
+task :ruby_env do
+  RUBY_APP = if RUBY_PLATFORM =~ /java/
+    "jruby"
+  else
+    "ruby"
+  end unless defined? RUBY_APP
+end

data/tasks/website.rake ADDED Viewed

@@ -0,0 +1,17 @@
+desc 'Generate website files'
+task :website_generate => :ruby_env do
+  (Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
+    sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
+  end
+end
+desc 'Upload website files to rubyforge'
+task :website_upload do
+  host = "#{rubyforge_username}@rubyforge.org"
+  remote_dir = "/var/www/gforge-projects/#{RUBYFORGE_PROJECT}/"
+  local_dir = 'website'
+  sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
+end
+desc 'Generate and upload website files'
+task :website => [:website_generate, :website_upload, :publish_docs, :publish_coverage]

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require 'test/unit'
+require File.dirname(__FILE__) + '/../lib/recordsearch'
+class TestDataSource < RecordSearch::DataSource
+  def initialize(max=100)
+    @max = max
+    @cur = 0
+  end
+  def next
+    if @cur < @max
+      @cur += 1
+      self.class.format(@cur - 1)
+    end
+  end
+  def self.format(x)
+      #TODO @max should have 10 digits or less
+      "%010d\n" % x
+  end
+end

data/test/test_index.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require File.dirname(__FILE__) + '/test_helper.rb'
+class TestIndex < Test::Unit::TestCase
+  def test_index
+    db_fname = 'tmp/list'
+    begin
+      RecordSearch.index(TestDataSource.new, db_fname)
+    ensure
+      File.delete(db_fname)
+      File.delete(db_fname + '.idx')
+    end
+  end
+end

data/test/test_recordsearch.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require File.dirname(__FILE__) + '/test_helper.rb'
+class TestRecordSearch < Test::Unit::TestCase
+  def test_search
+    100.times { |x| assert_search(x) }
+  end
+  private
+  def assert_search(max)
+    db_fname = 'tmp/test_search'
+    begin
+      RecordSearch.index(TestDataSource.new(max), db_fname)
+      bs = TestSearch.new(db_fname)
+      max.times do |x|
+        value = TestDataSource.format(x)
+        assert_equal value, bs.search(value)
+      end
+    ensure
+      File.delete(db_fname)
+      File.delete(db_fname + '.idx')
+    end
+  end
+end
+class TestSearch < RecordSearch::Search
+  def parse(what)
+    what
+  end
+  def gt(a, b)
+    a > b
+  end
+  def eq(a, b)
+    a == b
+  end
+end

data/website/index.html ADDED Viewed

@@ -0,0 +1,174 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <link rel="stylesheet" href="stylesheets/screen.css" type="text/css" media="screen" />
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+  <title>
+      Text Record Search
+  </title>
+  <script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
+<style>
+</style>
+  <script type="text/javascript">
+    window.onload = function() {
+      settings = {
+          tl: { radius: 10 },
+          tr: { radius: 10 },
+          bl: { radius: 10 },
+          br: { radius: 10 },
+          antiAlias: true,
+          autoPad: true,
+          validTags: ["div"]
+      }
+      var versionBox = new curvyCorners(settings, document.getElementById("version"));
+      versionBox.applyCornersToAll();
+    }
+  </script>
+</head>
+<body>
+<div id="main">
+    <h1>Text Record Search</h1>
+    <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/recordsearch"; return false'>
+      <p>Get Version</p>
+      <a href="http://rubyforge.org/projects/recordsearch" class="numbers">1.0.0</a>
+    </div>
+    <h1>&#x2192; &#8216;recordsearch&#8217;</h1>
+	<h2>What</h2>
+	<p>With <code>recordsearch</code> you can do binary searches on a text file. I created this library as a few weeks after participating in the <a href="http://www.rubyquiz.com/quiz139.html">Ruby Quiz #139</a> I found myself needing something similar in a project I was working on.</p>
+	<h2>Installing</h2>
+	<p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">recordsearch</span></pre></p>
+	<p>Or download the gem and install manually.</p>
+	<h2>The basics</h2>
+	<p>The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It&#8217;s assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.</p>
+	<p>Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.</p>
+	<h2>Demonstration of usage</h2>
+	<p>Suppose we have a ~40MB file with a list of inflected words and their corresponding <a href="http://en.wikipedia.org/wiki/Lemma_%28linguistics%29">lemma</a> in <i>big_file.txt</i>. Each line of the file has the format <i>&#8220;inflected_word  lemma&#8221;</i>.</p>
+	<p>First we require the needed files:</p>
+	<p><pre class='syntax'>
+<span class="ident">require</span> <span class="punct">'</span><span class="string">rubygems</span><span class="punct">'</span>
+<span class="ident">require</span> <span class="punct">'</span><span class="string">recordsearch</span><span class="punct">'</span>
+</pre></p>
+	<h3>Creating the index</h3>
+	<p>We have to create the index first, to do this, we need a subclass of <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html">RecordSearch::DataSource</a> . In this example we are using a <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html">RecordSearch::FileDataSource</a> which only reads each line from the file.</p>
+	<p><pre class='syntax'>
+<span class="ident">data_source</span> <span class="punct">=</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">FileDataSource</span><span class="punct">.</span><span class="ident">new</span><span class="punct">('</span><span class="string">big_file.txt</span><span class="punct">')</span>
+<span class="constant">RecordSearch</span><span class="punct">::</span><span class="ident">index</span><span class="punct">(</span><span class="ident">data_source</span><span class="punct">,</span> <span class="punct">'</span><span class="string">searchable_file</span><span class="punct">')</span>
+</pre></p>
+	<p>After this, <i>searchable_file</i> and <i>searchable_file.idx</i> have been created. This is one of the quirks the library has, if you don&#8217;t need to change the contents of <i>big_file.txt</i> you end up with <i>searchable_file</i> which is an exact duplicate of the original.</p>
+	<h3>Searching</h3>
+	<p>To search we need to subclass <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html">RecordSearch::Search</a> and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it&#8217;s first element as the inflected word and the second element the lemma.</p>
+	<p><pre class='syntax'>
+<span class="keyword">class </span><span class="class">MySearch</span> <span class="punct">&lt;</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">Search</span>
+  <span class="keyword">def </span><span class="method">initialize</span>
+    <span class="keyword">super</span><span class="punct">('</span><span class="string">searchable_file</span><span class="punct">')</span>
+  <span class="keyword">end</span>
+  <span class="keyword">def </span><span class="method">parse</span><span class="punct">(</span><span class="ident">what</span><span class="punct">)</span>
+    <span class="ident">what</span><span class="punct">.</span><span class="ident">chomp</span><span class="punct">.</span><span class="ident">split</span>
+  <span class="keyword">end</span>
+  <span class="keyword">def </span><span class="method">gt</span><span class="punct">(</span><span class="ident">a</span><span class="punct">,</span> <span class="ident">b</span><span class="punct">)</span>
+    <span class="ident">a</span><span class="punct">[</span><span class="number">0</span><span class="punct">]</span> <span class="punct">&gt;</span> <span class="ident">b</span>
+  <span class="keyword">end</span>
+  <span class="keyword">def </span><span class="method">eq</span><span class="punct">(</span><span class="ident">a</span><span class="punct">,</span> <span class="ident">b</span><span class="punct">)</span>
+    <span class="ident">a</span><span class="punct">[</span><span class="number">0</span><span class="punct">]</span> <span class="punct">==</span> <span class="ident">b</span>
+  <span class="keyword">end</span>
+<span class="keyword">end</span>
+</pre></p>
+	<p>And we search like this:
+<pre class='syntax'>
+<span class="ident">my_search</span> <span class="punct">=</span> <span class="constant">MySearch</span><span class="punct">.</span><span class="ident">new</span>
+<span class="ident">my_search</span><span class="punct">.</span><span class="ident">search</span><span class="punct">('</span><span class="string">item</span><span class="punct">')</span>
+</pre></p>
+	<h2>Forum</h2>
+There are two forums:
+<ul>
+<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19017">open-discussion</a></li>
+<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19018">help</a></li>
+</ul>
+	<h2>Documentacion</h2>
+	<p>The rdoc documentation is in the page <a href="http://recordsearch.rubyforge.org/rdoc">RecordSearch <span class="caps">API</span></a> and you can also see the <a href="http://recordsearch.rubyforge.org/coverage">test coverage report</a></p>
+	<h2>Source</h2>
+	<p>You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the <a href="http://rubyforge.org/projects/recordsearch">RecordSearch</a> RubyForge page.</p>
+	<h2>License</h2>
+	<p>This code is free to use under the terms of the <span class="caps">GPL</span> license.</p>
+	<h2>Contact</h2>
+	<p>Comments are welcome. Send an email to <a href="mailto:lparravi@gmail.com">Luis Parravicini</a> or post a message to one of the available forums.</p>
+    <p class="coda">
+      <a href="http://ktulu.com.ar">Luis Parravicini</a>, 20th November 2007<br>
+      Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
+    </p>
+</div>
+<script src="http://www.google-analytics.com/urchin.js" type="text/javascript">
+</script>
+<script type="text/javascript">
+_uacct = "UA-2740686-4";
+urchinTracker();
+</script>
+</body>
+</html>

data/website/index.txt ADDED Viewed

@@ -0,0 +1,97 @@
+h1. Text Record Search
+h1. &#x2192; 'recordsearch'
+h2. What
+With <code>recordsearch</code> you can do binary searches on a text file. I created this library as a few weeks after participating in the "Ruby Quiz #139":http://www.rubyquiz.com/quiz139.html I found myself needing something similar in a project I was working on.
+h2. Installing
+<pre syntax="ruby">sudo gem install recordsearch</pre>
+Or download the gem and install manually.
+h2. The basics
+The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It's assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.
+Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.
+h2. Demonstration of usage
+Suppose we have a ~40MB file with a list of inflected words and their corresponding "lemma":http://en.wikipedia.org/wiki/Lemma_%28linguistics%29 in <i>big_file.txt</i>. Each line of the file has the format <i>"inflected_word  lemma"</i>.
+First we require the needed files:
+<pre syntax="ruby">
+require 'rubygems'
+require 'recordsearch'
+</pre>
+h3. Creating the index
+We have to create the index first, to do this, we need a subclass of "RecordSearch::DataSource":http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html . In this example we are using a "RecordSearch::FileDataSource":http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html which only reads each line from the file.
+<pre syntax="ruby">
+data_source = RecordSearch::FileDataSource.new('big_file.txt')
+RecordSearch::index(data_source, 'searchable_file')
+</pre>
+After this, <i>searchable_file</i> and <i>searchable_file.idx</i> have been created. This is one of the quirks the library has, if you don't need to change the contents of <i>big_file.txt</i> you end up with <i>searchable_file</i> which is an exact duplicate of the original.
+h3. Searching
+To search we need to subclass "RecordSearch::Search":http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it's first element as the inflected word and the second element the lemma.
+<pre syntax="ruby">
+class MySearch < RecordSearch::Search
+  def initialize
+    super('searchable_file')
+  end
+  def parse(what)
+    what.chomp.split
+  end
+  def gt(a, b)
+    a[0] > b
+  end
+  def eq(a, b)
+    a[0] == b
+  end
+end
+</pre>
+And we search like this:
+<pre syntax="ruby">
+my_search = MySearch.new
+my_search.search('item')
+</pre>
+h2. Forum
+There are two forums:
+<ul>
+<li>"open-discussion":http://rubyforge.org/forum/forum.php?forum_id=19017</li>
+<li>"help":http://rubyforge.org/forum/forum.php?forum_id=19018</li>
+</ul>
+h2. Documentacion
+The rdoc documentation is in the page "RecordSearch API":http://recordsearch.rubyforge.org/rdoc and you can also see the "test coverage report":http://recordsearch.rubyforge.org/coverage
+h2. Source
+You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the "RecordSearch":http://rubyforge.org/projects/recordsearch RubyForge page.
+h2. License
+This code is free to use under the terms of the GPL license.
+h2. Contact
+Comments are welcome. Send an email to "Luis Parravicini":mailto:lparravi@gmail.com or post a message to one of the available forums.