indexer101 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/indexer101.rb +177 -0
- metadata +91 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f3987fe6c70a6da37dd0cbcb50bf3368601ee1b6ddc6a90da039d22dfa14f28b
|
4
|
+
data.tar.gz: f7a79a0f09d97948a5576c253c8f2d2e5f58079fdc0830ee4fbeee28c208c3af
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fe696d43cc8c49962e3bb44a6f55497dd178b527d31cde3e395c2b2c1ef984add180fc503ac7f97fb2f4a716e37f785535972e149b3f6ec0cbeaa5698d64b1c9
|
7
|
+
data.tar.gz: e591fdfbec8eb9e579473075dabe554c04f163ffa23548746833d27053e5a55e7ba3514b2cd154acf43e8a4d2124f5cd753dd0bfcaa30697cd04d0e487bec318
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data.tar.gz.sig
ADDED
Binary file
|
data/lib/indexer101.rb
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: indexer101.rb
|
4
|
+
|
5
|
+
require 'c32'
|
6
|
+
require 'thread'
|
7
|
+
require 'thwait'
|
8
|
+
|
9
|
+
|
10
|
+
class Indexer101
|
11
|
+
using ColouredText
|
12
|
+
|
13
|
+
class Index
|
14
|
+
|
15
|
+
attr_reader :h
|
16
|
+
attr_accessor :index
|
17
|
+
|
18
|
+
def initialize()
|
19
|
+
end
|
20
|
+
|
21
|
+
def build(a)
|
22
|
+
|
23
|
+
threads = []
|
24
|
+
threads << Thread.new do
|
25
|
+
@index = Hash[a.map(&:to_sym).zip([''] * a.length)]
|
26
|
+
end
|
27
|
+
|
28
|
+
threads << Thread.new { @h = group a }
|
29
|
+
ThreadsWait.all_waits(*threads)
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect()
|
34
|
+
h = @h ? @h.inspect[0..30] + "..." : nil
|
35
|
+
"#<Indexer101::Index @h=#{h.inspect}>"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def group(a, length=0)
|
41
|
+
|
42
|
+
h = a.group_by {|x| x[0..length]}
|
43
|
+
|
44
|
+
h.each do |key, value|
|
45
|
+
|
46
|
+
if length+1 < value.max.length - 1 then
|
47
|
+
h2 = group value, length + 1
|
48
|
+
h[key] = h2 unless h2.length < 2 and value.length < 2
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
h3 = h.inject({}) do |r,x|
|
54
|
+
r.merge(x[0].to_sym => x[-1])
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def initialize(filename='indexer.dat', debug: false)
|
62
|
+
|
63
|
+
@filename, @debug = filename, debug
|
64
|
+
|
65
|
+
puts
|
66
|
+
puts 'Indexer101'.highlight + " ready to index".green
|
67
|
+
puts
|
68
|
+
|
69
|
+
@indexer = Index.new()
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
def build(a)
|
74
|
+
|
75
|
+
t = Time.now
|
76
|
+
@indexer.build(a)
|
77
|
+
t2 = Time.now - t
|
78
|
+
|
79
|
+
puts "%d words indexed".info % a.length
|
80
|
+
puts "index built in %.2f seconds".info % t2
|
81
|
+
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
def index()
|
86
|
+
@indexer.index
|
87
|
+
end
|
88
|
+
|
89
|
+
def read(filename=@filename)
|
90
|
+
|
91
|
+
t = Time.now
|
92
|
+
|
93
|
+
File.open(filename) do |f|
|
94
|
+
@indexer = Marshal.load(f)
|
95
|
+
end
|
96
|
+
|
97
|
+
t2 = Time.now - t
|
98
|
+
|
99
|
+
puts "index contains %d words".info % @indexer.index.length
|
100
|
+
puts "index read in %.2f seconds".info % t2
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
def save(filename=@filename)
|
105
|
+
|
106
|
+
File.open(filename, 'w+') do |f|
|
107
|
+
Marshal.dump(@indexer, f)
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
def search(s, limit: 10)
|
113
|
+
|
114
|
+
t = Time.now
|
115
|
+
a = scan_path s
|
116
|
+
puts ('a: ' + a.inspect[0..100] + '...').debug if @debug
|
117
|
+
|
118
|
+
i = scan_key @indexer.h, a
|
119
|
+
|
120
|
+
r = @indexer.h.dig(*a[0..i])
|
121
|
+
puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
|
122
|
+
|
123
|
+
return r if r.is_a? Array
|
124
|
+
|
125
|
+
results = scan_leaves(r).sort_by(&:length).take(limit)
|
126
|
+
t2 = Time.now - t
|
127
|
+
puts "search took %.2f seconds" % t2 if @debug
|
128
|
+
|
129
|
+
return results
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def scan_key(h, keys, index=0)
|
136
|
+
|
137
|
+
r = h.fetch keys[index]
|
138
|
+
|
139
|
+
puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
|
140
|
+
|
141
|
+
if r.is_a?(Hash) and index+1 < keys.length and r.fetch keys[index+1] then
|
142
|
+
scan_key r, keys, index+1
|
143
|
+
else
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
def scan_leaves(h)
|
150
|
+
|
151
|
+
h.inject([]) do |r,x|
|
152
|
+
key, value = x
|
153
|
+
|
154
|
+
if value.is_a? Array then
|
155
|
+
r += value
|
156
|
+
else
|
157
|
+
r += scan_leaves value
|
158
|
+
end
|
159
|
+
|
160
|
+
r
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def scan_path(s, length=0)
|
165
|
+
|
166
|
+
puts 'inside scan_path'.info if @debug
|
167
|
+
|
168
|
+
r = [s[0..length].to_sym]
|
169
|
+
|
170
|
+
if length < s.length - 1 then
|
171
|
+
r += scan_path(s, length+1)
|
172
|
+
else
|
173
|
+
r
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indexer101
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTExMjI1NjA5WhcN
|
15
|
+
MjAxMTEwMjI1NjA5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDEF92R
|
17
|
+
JWEWjPhgU6nIiCKKXtWI9EE8DbqPtU+CnYuUN2BWN7c4dsbaYrU1tVMzxx22G+Of
|
18
|
+
apTIeIGFrq/oqub6nhx+UbnkMAqpmbJS8zTgnwEhFsGHGI2CD9+4OXh5rl36SqqP
|
19
|
+
IGxY7w45KDvuPWA/Htb2aC20cHclJebOjeaMNogpYDByVEjjxtZeiOmIJtJlQSf+
|
20
|
+
YpUviQVTkFJqbSK0WkKsPLQZu29P1zHETkdBvIGlGGDdo13haBW2Hdj9a2INeWcz
|
21
|
+
B+v6nAdnv9fTTvH0GX51XDa/EIisWNELaopHk1Hcx97pZdm92gaICQfdgUHje65s
|
22
|
+
oUDjyynUKE8dq+LAPEq5B1wj3e4BsIOFI5PSvPCMWI5jpbJWBXV6owex9qln22UA
|
23
|
+
lDUEP3COb9/+r6bGNiCBUvUwyy7l/RdmRXPSOYnP8jPcwD/qSVypJYObM+2q68qc
|
24
|
+
5Eg0WqCVdFgpIXzBJPVcxeDJiN6EKmFWr5vJkMwIGz9hhIcitQjjSguk4r8CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUN0C2lCCv
|
26
|
+
/ATnmYSeZRQfUNpplfwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAXrKEIca0q3x//SBOsv17jkonBSlzwVLIBuXKXX4R
|
29
|
+
f0q3kJw7vWBPJfIVpAcGn6mbx2ziWw3XvG/SMuwIfzitz0pKTapvwbKZFPscqy82
|
30
|
+
KcDxGtJt1cjDHzl0Bm+mgN/MgY+PAj3TcT/osuCc8iTu4+Ib6UxkmOP/uy2svBLs
|
31
|
+
Su2XzGoqd5SKxPpj7IwMOBSVQWrqgWN2B+gdkN0CqjUPVMmiEKuMNjz9Idu36nG/
|
32
|
+
QOPsTlpqBxR+yFbPEP0DlN5X8BRRAsraQZ+LPi7W/bU1fjkvIJxXuUkQD9dMF99+
|
33
|
+
me+6s7PoCJ1yMmc5XfMFmv4WYswC+VqI1EeG0EneAsxRo8MmhWZcokqRE/KUk+ym
|
34
|
+
NlDFqcKPZDaMsdO5dkYvEeguet/iG3XS1u7WKAcZgfhPdiIbue7cAhz9eQpNybe7
|
35
|
+
08cN0E9zjqKINgH/PsZTot+ohuVRLwn6WmHHhb18oUrxt3a0u4/3TNcWOcMeR0F2
|
36
|
+
GeYL+mKGct5bfjn8IZnAJVKY
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2019-11-11 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: c32
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.2.0
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.2.0
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.2'
|
60
|
+
description:
|
61
|
+
email: james@jamesrobertson.eu
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/indexer101.rb
|
67
|
+
homepage: https://github.com/jrobertson/indexer101
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.0.3
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Experimental gem to search a list of words 1 character at a time. Intended
|
90
|
+
for use as auto suggestion.
|
91
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|