recordsearch 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,6 +1,6 @@
1
- == 1.0.0 2007-11-19
1
+ == 1.1.0 2009-09-30
2
2
 
3
- * renamed project to recordsearch, as there is another project already with the same name (binarysearch).
3
+ * changes to make it work on Ruby 1.9.1
4
4
 
5
5
  == 0.0.2 2007-10-25
6
6
 
data/README.txt CHANGED
@@ -1,4 +1,4 @@
1
- = Record Search 1.0
1
+ = Record Search 1.1
2
2
 
3
3
  == What is recordsearch?
4
4
 
@@ -1,7 +1,7 @@
1
1
  module RecordSearch
2
2
  # Builds an index file using as source data +data_source+, which can be a
3
3
  # subclass of RecordSearch::DataSource or an object with a next method.
4
- def self.index(data_source, db, db_idx=nil)
4
+ def self.index(data_source, db, db_idx=nil)
5
5
  db_idx = db + '.idx' if db_idx.nil?
6
6
  records = 0
7
7
 
@@ -1,5 +1,3 @@
1
- require 'readbytes'
2
-
3
1
  module RecordSearch
4
2
 
5
3
  class Search
@@ -61,11 +59,23 @@ module RecordSearch
61
59
  # Reads and parses the line at position +index+.
62
60
  def read(index)
63
61
  @db_idx.seek(index * INDEX_SIZE)
64
- pos = @db_idx.readbytes(INDEX_SIZE).unpack('L')[0]
62
+ pos = readbytes(INDEX_SIZE).unpack('L')[0]
65
63
  @db.seek(pos)
66
64
  parse(@db.gets)
67
65
  end
68
66
 
67
+ # my own version of IO#readbytes as 1.9 doesn't have it
68
+ def readbytes(n)
69
+ s = @db_idx.read(INDEX_SIZE)
70
+ if s.nil?
71
+ raise EOFError
72
+ elsif s.size != n
73
+ raise IOError, "truncated"
74
+ end
75
+
76
+ s
77
+ end
78
+
69
79
  # Parse a record read from the file. This method raises an exception.
70
80
  # You must implement this method to parse the file according to your needs.
71
81
  def parse(what)
@@ -1,7 +1,7 @@
1
1
  module RecordSearch
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
- MINOR = 0
4
+ MINOR = 1
5
5
  TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
data/script/destroy CHANGED
File without changes
data/script/generate CHANGED
File without changes
data/script/txt2html CHANGED
@@ -22,9 +22,9 @@ class Fixnum
22
22
  return 'th' if (10..19).include?(self % 100)
23
23
  # others
24
24
  case self % 10
25
- when 1: return 'st'
26
- when 2: return 'nd'
27
- when 3: return 'rd'
25
+ when 1 then return 'st'
26
+ when 2 then return 'nd'
27
+ when 3 then return 'rd'
28
28
  else return 'th'
29
29
  end
30
30
  end
data/website/index.html CHANGED
@@ -33,72 +33,34 @@
33
33
  <h1>Text Record Search</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/recordsearch"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/recordsearch" class="numbers">1.0.0</a>
36
+ <a href="http://rubyforge.org/projects/recordsearch" class="numbers">1.1.0</a>
37
37
  </div>
38
- <h1>&#x2192; &#8216;recordsearch&#8217;</h1>
39
-
40
-
41
- <h2>What</h2>
42
-
43
-
44
- <p>With <code>recordsearch</code> you can do binary searches on a text file. I created this library as a few weeks after participating in the <a href="http://www.rubyquiz.com/quiz139.html">Ruby Quiz #139</a> I found myself needing something similar in a project I was working on.</p>
45
-
46
-
47
- <h2>Installing</h2>
48
-
49
-
50
- <p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">recordsearch</span></pre></p>
51
-
52
-
53
- <p>Or download the gem and install manually.</p>
54
-
55
-
56
- <h2>The basics</h2>
57
-
58
-
59
- <p>The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It&#8217;s assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.</p>
60
-
61
-
62
- <p>Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.</p>
63
-
64
-
65
- <h2>Demonstration of usage</h2>
66
-
67
-
68
- <p>Suppose we have a ~40MB file with a list of inflected words and their corresponding <a href="http://en.wikipedia.org/wiki/Lemma_%28linguistics%29">lemma</a> in <i>big_file.txt</i>. Each line of the file has the format <i>&#8220;inflected_word lemma&#8221;</i>.</p>
69
-
70
-
71
- <p>First we require the needed files:</p>
72
-
73
-
74
- <p><pre class='syntax'>
38
+ <h1>&#8216;recordsearch&#8217;</h1>
39
+ <h2>What</h2>
40
+ <p>With <code>recordsearch</code> you can do binary searches on a text file. I created this library as a few weeks after participating in the <a href="http://www.rubyquiz.com/quiz139.html">Ruby Quiz #139</a> I found myself needing something similar in a project I was working on.</p>
41
+ <h2>Installing</h2>
42
+ <p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">recordsearch</span></pre></p>
43
+ <p>Or download the gem and install manually.</p>
44
+ <h2>The basics</h2>
45
+ <p>The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It&#8217;s assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.</p>
46
+ <p>Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.</p>
47
+ <h2>Demonstration of usage</h2>
48
+ <p>Suppose we have a ~40MB file with a list of inflected words and their corresponding <a href="http://en.wikipedia.org/wiki/Lemma_%28linguistics%29">lemma</a> in <i>big_file.txt</i>. Each line of the file has the format <i>&#8220;inflected_word lemma&#8221;</i>.</p>
49
+ <p>First we require the needed files:</p>
50
+ <p><pre class='syntax'>
75
51
  <span class="ident">require</span> <span class="punct">'</span><span class="string">rubygems</span><span class="punct">'</span>
76
52
  <span class="ident">require</span> <span class="punct">'</span><span class="string">recordsearch</span><span class="punct">'</span>
77
53
  </pre></p>
78
-
79
-
80
- <h3>Creating the index</h3>
81
-
82
-
83
- <p>We have to create the index first, to do this, we need a subclass of <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html">RecordSearch::DataSource</a> . In this example we are using a <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html">RecordSearch::FileDataSource</a> which only reads each line from the file.</p>
84
-
85
-
86
- <p><pre class='syntax'>
54
+ <h3>Creating the index</h3>
55
+ <p>We have to create the index first, to do this, we need a subclass of <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html">RecordSearch::DataSource</a> . In this example we are using a <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html">RecordSearch::FileDataSource</a> which only reads each line from the file.</p>
56
+ <p><pre class='syntax'>
87
57
  <span class="ident">data_source</span> <span class="punct">=</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">FileDataSource</span><span class="punct">.</span><span class="ident">new</span><span class="punct">('</span><span class="string">big_file.txt</span><span class="punct">')</span>
88
58
  <span class="constant">RecordSearch</span><span class="punct">::</span><span class="ident">index</span><span class="punct">(</span><span class="ident">data_source</span><span class="punct">,</span> <span class="punct">'</span><span class="string">searchable_file</span><span class="punct">')</span>
89
59
  </pre></p>
90
-
91
-
92
- <p>After this, <i>searchable_file</i> and <i>searchable_file.idx</i> have been created. This is one of the quirks the library has, if you don&#8217;t need to change the contents of <i>big_file.txt</i> you end up with <i>searchable_file</i> which is an exact duplicate of the original.</p>
93
-
94
-
95
- <h3>Searching</h3>
96
-
97
-
98
- <p>To search we need to subclass <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html">RecordSearch::Search</a> and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it&#8217;s first element as the inflected word and the second element the lemma.</p>
99
-
100
-
101
- <p><pre class='syntax'>
60
+ <p>After this, <i>searchable_file</i> and <i>searchable_file.idx</i> have been created. This is one of the quirks the library has, if you don&#8217;t need to change the contents of <i>big_file.txt</i> you end up with <i>searchable_file</i> which is an exact duplicate of the original.</p>
61
+ <h3>Searching</h3>
62
+ <p>To search we need to subclass <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html">RecordSearch::Search</a> and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it&#8217;s first element as the inflected word and the second element the lemma.</p>
63
+ <p><pre class='syntax'>
102
64
  <span class="keyword">class </span><span class="class">MySearch</span> <span class="punct">&lt;</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">Search</span>
103
65
  <span class="keyword">def </span><span class="method">initialize</span>
104
66
  <span class="keyword">super</span><span class="punct">('</span><span class="string">searchable_file</span><span class="punct">')</span>
@@ -117,48 +79,27 @@
117
79
  <span class="keyword">end</span>
118
80
  <span class="keyword">end</span>
119
81
  </pre></p>
120
-
121
-
122
- <p>And we search like this:
82
+ <p>And we search like this:<br />
123
83
  <pre class='syntax'>
124
84
  <span class="ident">my_search</span> <span class="punct">=</span> <span class="constant">MySearch</span><span class="punct">.</span><span class="ident">new</span>
125
85
  <span class="ident">my_search</span><span class="punct">.</span><span class="ident">search</span><span class="punct">('</span><span class="string">item</span><span class="punct">')</span>
126
86
  </pre></p>
127
-
128
-
129
- <h2>Forum</h2>
130
-
131
-
132
- There are two forums:
133
- <ul>
134
- <li><a href="http://rubyforge.org/forum/forum.php?forum_id=19017">open-discussion</a></li>
135
- <li><a href="http://rubyforge.org/forum/forum.php?forum_id=19018">help</a></li>
87
+ <h2>Forum</h2>
88
+ <p>There are two forums:<br />
89
+ <ul><br />
90
+ <li><a href="http://rubyforge.org/forum/forum.php?forum_id=19017">open-discussion</a></li><br />
91
+ <li><a href="http://rubyforge.org/forum/forum.php?forum_id=19018">help</a></li></p>
136
92
  </ul>
137
-
138
- <h2>Documentacion</h2>
139
-
140
-
141
- <p>The rdoc documentation is in the page <a href="http://recordsearch.rubyforge.org/rdoc">RecordSearch <span class="caps">API</span></a> and you can also see the <a href="http://recordsearch.rubyforge.org/coverage">test coverage report</a></p>
142
-
143
-
144
- <h2>Source</h2>
145
-
146
-
147
- <p>You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the <a href="http://rubyforge.org/projects/recordsearch">RecordSearch</a> RubyForge page.</p>
148
-
149
-
150
- <h2>License</h2>
151
-
152
-
153
- <p>This code is free to use under the terms of the <span class="caps">GPL</span> license.</p>
154
-
155
-
156
- <h2>Contact</h2>
157
-
158
-
159
- <p>Comments are welcome. Send an email to <a href="mailto:lparravi@gmail.com">Luis Parravicini</a> or post a message to one of the available forums.</p>
93
+ <h2>Documentacion</h2>
94
+ <p>The rdoc documentation is in the page <a href="http://recordsearch.rubyforge.org/rdoc">RecordSearch <span class="caps">API</span></a> and you can also see the <a href="http://recordsearch.rubyforge.org/coverage">test coverage report</a></p>
95
+ <h2>Source</h2>
96
+ <p>You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the <a href="http://rubyforge.org/projects/recordsearch">RecordSearch</a> RubyForge page.</p>
97
+ <h2>License</h2>
98
+ <p>This code is free to use under the terms of the <span class="caps">GPL</span> license.</p>
99
+ <h2>Contact</h2>
100
+ <p>Comments are welcome. Send an email to <a href="mailto:lparravi@gmail.com">Luis Parravicini</a> or post a message to one of the available forums.</p>
160
101
  <p class="coda">
161
- <a href="http://ktulu.com.ar">Luis Parravicini</a>, 20th November 2007<br>
102
+ <a href="http://ktulu.com.ar">Luis Parravicini</a>, 1st October 2009<br>
162
103
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
163
104
  </p>
164
105
  </div>
data/website/index.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  h1. Text Record Search
2
2
 
3
- h1. &#x2192; 'recordsearch'
3
+ h1. 'recordsearch'
4
4
 
5
5
 
6
6
  h2. What
metadata CHANGED
@@ -1,33 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
4
2
  name: recordsearch
5
3
  version: !ruby/object:Gem::Version
6
- version: 1.0.0
7
- date: 2007-11-20 00:00:00 -03:00
8
- summary: Search on a text file using binary search
9
- require_paths:
10
- - lib
11
- email: lparravi@gmail.com
12
- homepage: http://recordsearch.rubyforge.org
13
- rubyforge_project: recordsearch
14
- description: Search on a text file using binary search
15
- autorequire:
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 1.1.0
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Luis Parravicini
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-01 00:00:00 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.3.3
24
+ version:
25
+ description: Search on a text file using binary search
26
+ email: lparravi@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - History.txt
33
+ - License.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ - website/index.txt
31
37
  files:
32
38
  - History.txt
33
39
  - License.txt
@@ -56,24 +62,36 @@ files:
56
62
  - website/javascripts/rounded_corners_lite.inc.js
57
63
  - website/stylesheets/screen.css
58
64
  - website/template.rhtml
59
- test_files:
60
- - test/test_helper.rb
61
- - test/test_index.rb
62
- - test/test_recordsearch.rb
65
+ has_rdoc: true
66
+ homepage: http://recordsearch.rubyforge.org
67
+ licenses: []
68
+
69
+ post_install_message:
63
70
  rdoc_options:
64
71
  - --main
65
72
  - README.txt
66
- extra_rdoc_files:
67
- - History.txt
68
- - License.txt
69
- - Manifest.txt
70
- - README.txt
71
- - website/index.txt
72
- executables: []
73
-
74
- extensions: []
75
-
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ version:
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: "0"
86
+ version:
76
87
  requirements: []
77
88
 
78
- dependencies: []
79
-
89
+ rubyforge_project: recordsearch
90
+ rubygems_version: 1.3.5
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Search on a text file using binary search
94
+ test_files:
95
+ - test/test_recordsearch.rb
96
+ - test/test_helper.rb
97
+ - test/test_index.rb