recordsearch 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +2 -2
- data/README.txt +1 -1
- data/lib/recordsearch/indexer.rb +1 -1
- data/lib/recordsearch/search.rb +13 -3
- data/lib/recordsearch/version.rb +1 -1
- data/script/destroy +0 -0
- data/script/generate +0 -0
- data/script/txt2html +3 -3
- data/website/index.html +36 -95
- data/website/index.txt +1 -1
- metadata +58 -40
data/History.txt
CHANGED
data/README.txt
CHANGED
data/lib/recordsearch/indexer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module RecordSearch
|
2
2
|
# Builds an index file using as source data +data_source+, which can be a
|
3
3
|
# subclass of RecordSearch::DataSource or an object with a next method.
|
4
|
-
|
4
|
+
def self.index(data_source, db, db_idx=nil)
|
5
5
|
db_idx = db + '.idx' if db_idx.nil?
|
6
6
|
records = 0
|
7
7
|
|
data/lib/recordsearch/search.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'readbytes'
|
2
|
-
|
3
1
|
module RecordSearch
|
4
2
|
|
5
3
|
class Search
|
@@ -61,11 +59,23 @@ module RecordSearch
|
|
61
59
|
# Reads and parses the line at position +index+.
|
62
60
|
def read(index)
|
63
61
|
@db_idx.seek(index * INDEX_SIZE)
|
64
|
-
pos =
|
62
|
+
pos = readbytes(INDEX_SIZE).unpack('L')[0]
|
65
63
|
@db.seek(pos)
|
66
64
|
parse(@db.gets)
|
67
65
|
end
|
68
66
|
|
67
|
+
# my own version of IO#readbytes as 1.9 doesn't have it
|
68
|
+
def readbytes(n)
|
69
|
+
s = @db_idx.read(INDEX_SIZE)
|
70
|
+
if s.nil?
|
71
|
+
raise EOFError
|
72
|
+
elsif s.size != n
|
73
|
+
raise IOError, "truncated"
|
74
|
+
end
|
75
|
+
|
76
|
+
s
|
77
|
+
end
|
78
|
+
|
69
79
|
# Parse a record read from the file. This method raises an exception.
|
70
80
|
# You must implement this method to parse the file according to your needs.
|
71
81
|
def parse(what)
|
data/lib/recordsearch/version.rb
CHANGED
data/script/destroy
CHANGED
File without changes
|
data/script/generate
CHANGED
File without changes
|
data/script/txt2html
CHANGED
@@ -22,9 +22,9 @@ class Fixnum
|
|
22
22
|
return 'th' if (10..19).include?(self % 100)
|
23
23
|
# others
|
24
24
|
case self % 10
|
25
|
-
when 1
|
26
|
-
when 2
|
27
|
-
when 3
|
25
|
+
when 1 then return 'st'
|
26
|
+
when 2 then return 'nd'
|
27
|
+
when 3 then return 'rd'
|
28
28
|
else return 'th'
|
29
29
|
end
|
30
30
|
end
|
data/website/index.html
CHANGED
@@ -33,72 +33,34 @@
|
|
33
33
|
<h1>Text Record Search</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/recordsearch"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/recordsearch" class="numbers">1.
|
36
|
+
<a href="http://rubyforge.org/projects/recordsearch" class="numbers">1.1.0</a>
|
37
37
|
</div>
|
38
|
-
<h1>&#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
<p>Or download the gem and install manually.</p>
|
54
|
-
|
55
|
-
|
56
|
-
<h2>The basics</h2>
|
57
|
-
|
58
|
-
|
59
|
-
<p>The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It’s assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.</p>
|
60
|
-
|
61
|
-
|
62
|
-
<p>Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.</p>
|
63
|
-
|
64
|
-
|
65
|
-
<h2>Demonstration of usage</h2>
|
66
|
-
|
67
|
-
|
68
|
-
<p>Suppose we have a ~40MB file with a list of inflected words and their corresponding <a href="http://en.wikipedia.org/wiki/Lemma_%28linguistics%29">lemma</a> in <i>big_file.txt</i>. Each line of the file has the format <i>“inflected_word lemma”</i>.</p>
|
69
|
-
|
70
|
-
|
71
|
-
<p>First we require the needed files:</p>
|
72
|
-
|
73
|
-
|
74
|
-
<p><pre class='syntax'>
|
38
|
+
<h1>‘recordsearch’</h1>
|
39
|
+
<h2>What</h2>
|
40
|
+
<p>With <code>recordsearch</code> you can do binary searches on a text file. I created this library as a few weeks after participating in the <a href="http://www.rubyquiz.com/quiz139.html">Ruby Quiz #139</a> I found myself needing something similar in a project I was working on.</p>
|
41
|
+
<h2>Installing</h2>
|
42
|
+
<p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">recordsearch</span></pre></p>
|
43
|
+
<p>Or download the gem and install manually.</p>
|
44
|
+
<h2>The basics</h2>
|
45
|
+
<p>The approach <code>recordsearch</code> uses is to pre process the text file to generate first an index and storing it in a file before being able to begin searching. It’s assumed the records are of variable size, it should not make much sense to use an index if you have records of fixed size. After the index is created you can begin to do searches.</p>
|
46
|
+
<p>Bear in mind I created this library to suit my needs, so, for example, when the index is being created, the contents of the original file is used to create another as I needed to transform the original file.</p>
|
47
|
+
<h2>Demonstration of usage</h2>
|
48
|
+
<p>Suppose we have a ~40MB file with a list of inflected words and their corresponding <a href="http://en.wikipedia.org/wiki/Lemma_%28linguistics%29">lemma</a> in <i>big_file.txt</i>. Each line of the file has the format <i>“inflected_word lemma”</i>.</p>
|
49
|
+
<p>First we require the needed files:</p>
|
50
|
+
<p><pre class='syntax'>
|
75
51
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rubygems</span><span class="punct">'</span>
|
76
52
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">recordsearch</span><span class="punct">'</span>
|
77
53
|
</pre></p>
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
<p>We have to create the index first, to do this, we need a subclass of <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html">RecordSearch::DataSource</a> . In this example we are using a <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html">RecordSearch::FileDataSource</a> which only reads each line from the file.</p>
|
84
|
-
|
85
|
-
|
86
|
-
<p><pre class='syntax'>
|
54
|
+
<h3>Creating the index</h3>
|
55
|
+
<p>We have to create the index first, to do this, we need a subclass of <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/DataSource.html">RecordSearch::DataSource</a> . In this example we are using a <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/FileDataSource.html">RecordSearch::FileDataSource</a> which only reads each line from the file.</p>
|
56
|
+
<p><pre class='syntax'>
|
87
57
|
<span class="ident">data_source</span> <span class="punct">=</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">FileDataSource</span><span class="punct">.</span><span class="ident">new</span><span class="punct">('</span><span class="string">big_file.txt</span><span class="punct">')</span>
|
88
58
|
<span class="constant">RecordSearch</span><span class="punct">::</span><span class="ident">index</span><span class="punct">(</span><span class="ident">data_source</span><span class="punct">,</span> <span class="punct">'</span><span class="string">searchable_file</span><span class="punct">')</span>
|
89
59
|
</pre></p>
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
<h3>Searching</h3>
|
96
|
-
|
97
|
-
|
98
|
-
<p>To search we need to subclass <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html">RecordSearch::Search</a> and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it’s first element as the inflected word and the second element the lemma.</p>
|
99
|
-
|
100
|
-
|
101
|
-
<p><pre class='syntax'>
|
60
|
+
<p>After this, <i>searchable_file</i> and <i>searchable_file.idx</i> have been created. This is one of the quirks the library has, if you don’t need to change the contents of <i>big_file.txt</i> you end up with <i>searchable_file</i> which is an exact duplicate of the original.</p>
|
61
|
+
<h3>Searching</h3>
|
62
|
+
<p>To search we need to subclass <a href="http://recordsearch.rubyforge.org/rdoc/classes/RecordSearch/Search.html">RecordSearch::Search</a> and implement the methods <code>parse</code>, <code>gt</code> and <code>eq</code>. In this example <i>parse</i> reads a line, and split it in an array with it’s first element as the inflected word and the second element the lemma.</p>
|
63
|
+
<p><pre class='syntax'>
|
102
64
|
<span class="keyword">class </span><span class="class">MySearch</span> <span class="punct"><</span> <span class="constant">RecordSearch</span><span class="punct">::</span><span class="constant">Search</span>
|
103
65
|
<span class="keyword">def </span><span class="method">initialize</span>
|
104
66
|
<span class="keyword">super</span><span class="punct">('</span><span class="string">searchable_file</span><span class="punct">')</span>
|
@@ -117,48 +79,27 @@
|
|
117
79
|
<span class="keyword">end</span>
|
118
80
|
<span class="keyword">end</span>
|
119
81
|
</pre></p>
|
120
|
-
|
121
|
-
|
122
|
-
<p>And we search like this:
|
82
|
+
<p>And we search like this:<br />
|
123
83
|
<pre class='syntax'>
|
124
84
|
<span class="ident">my_search</span> <span class="punct">=</span> <span class="constant">MySearch</span><span class="punct">.</span><span class="ident">new</span>
|
125
85
|
<span class="ident">my_search</span><span class="punct">.</span><span class="ident">search</span><span class="punct">('</span><span class="string">item</span><span class="punct">')</span>
|
126
86
|
</pre></p>
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
There are two forums:
|
133
|
-
<ul>
|
134
|
-
<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19017">open-discussion</a></li>
|
135
|
-
<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19018">help</a></li>
|
87
|
+
<h2>Forum</h2>
|
88
|
+
<p>There are two forums:<br />
|
89
|
+
<ul><br />
|
90
|
+
<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19017">open-discussion</a></li><br />
|
91
|
+
<li><a href="http://rubyforge.org/forum/forum.php?forum_id=19018">help</a></li></p>
|
136
92
|
</ul>
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
<p>You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the <a href="http://rubyforge.org/projects/recordsearch">RecordSearch</a> RubyForge page.</p>
|
148
|
-
|
149
|
-
|
150
|
-
<h2>License</h2>
|
151
|
-
|
152
|
-
|
153
|
-
<p>This code is free to use under the terms of the <span class="caps">GPL</span> license.</p>
|
154
|
-
|
155
|
-
|
156
|
-
<h2>Contact</h2>
|
157
|
-
|
158
|
-
|
159
|
-
<p>Comments are welcome. Send an email to <a href="mailto:lparravi@gmail.com">Luis Parravicini</a> or post a message to one of the available forums.</p>
|
93
|
+
<h2>Documentacion</h2>
|
94
|
+
<p>The rdoc documentation is in the page <a href="http://recordsearch.rubyforge.org/rdoc">RecordSearch <span class="caps">API</span></a> and you can also see the <a href="http://recordsearch.rubyforge.org/coverage">test coverage report</a></p>
|
95
|
+
<h2>Source</h2>
|
96
|
+
<p>You can get the source using svn using this url <code>svn://rubyforge.org/var/svn/recordsearch/trunk</code> for anonymous access or go the the <a href="http://rubyforge.org/projects/recordsearch">RecordSearch</a> RubyForge page.</p>
|
97
|
+
<h2>License</h2>
|
98
|
+
<p>This code is free to use under the terms of the <span class="caps">GPL</span> license.</p>
|
99
|
+
<h2>Contact</h2>
|
100
|
+
<p>Comments are welcome. Send an email to <a href="mailto:lparravi@gmail.com">Luis Parravicini</a> or post a message to one of the available forums.</p>
|
160
101
|
<p class="coda">
|
161
|
-
<a href="http://ktulu.com.ar">Luis Parravicini</a>,
|
102
|
+
<a href="http://ktulu.com.ar">Luis Parravicini</a>, 1st October 2009<br>
|
162
103
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
163
104
|
</p>
|
164
105
|
</div>
|
data/website/index.txt
CHANGED
metadata
CHANGED
@@ -1,33 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
4
2
|
name: recordsearch
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-11-20 00:00:00 -03:00
|
8
|
-
summary: Search on a text file using binary search
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: lparravi@gmail.com
|
12
|
-
homepage: http://recordsearch.rubyforge.org
|
13
|
-
rubyforge_project: recordsearch
|
14
|
-
description: Search on a text file using binary search
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 1.1.0
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Luis Parravicini
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-01 00:00:00 -03:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hoe
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.3.3
|
24
|
+
version:
|
25
|
+
description: Search on a text file using binary search
|
26
|
+
email: lparravi@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- History.txt
|
33
|
+
- License.txt
|
34
|
+
- Manifest.txt
|
35
|
+
- README.txt
|
36
|
+
- website/index.txt
|
31
37
|
files:
|
32
38
|
- History.txt
|
33
39
|
- License.txt
|
@@ -56,24 +62,36 @@ files:
|
|
56
62
|
- website/javascripts/rounded_corners_lite.inc.js
|
57
63
|
- website/stylesheets/screen.css
|
58
64
|
- website/template.rhtml
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
65
|
+
has_rdoc: true
|
66
|
+
homepage: http://recordsearch.rubyforge.org
|
67
|
+
licenses: []
|
68
|
+
|
69
|
+
post_install_message:
|
63
70
|
rdoc_options:
|
64
71
|
- --main
|
65
72
|
- README.txt
|
66
|
-
|
67
|
-
-
|
68
|
-
|
69
|
-
|
70
|
-
-
|
71
|
-
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
80
|
+
version:
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
version:
|
76
87
|
requirements: []
|
77
88
|
|
78
|
-
|
79
|
-
|
89
|
+
rubyforge_project: recordsearch
|
90
|
+
rubygems_version: 1.3.5
|
91
|
+
signing_key:
|
92
|
+
specification_version: 3
|
93
|
+
summary: Search on a text file using binary search
|
94
|
+
test_files:
|
95
|
+
- test/test_recordsearch.rb
|
96
|
+
- test/test_helper.rb
|
97
|
+
- test/test_index.rb
|