indexer101 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/indexer101.rb +177 -0
- metadata +91 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f3987fe6c70a6da37dd0cbcb50bf3368601ee1b6ddc6a90da039d22dfa14f28b
|
4
|
+
data.tar.gz: f7a79a0f09d97948a5576c253c8f2d2e5f58079fdc0830ee4fbeee28c208c3af
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fe696d43cc8c49962e3bb44a6f55497dd178b527d31cde3e395c2b2c1ef984add180fc503ac7f97fb2f4a716e37f785535972e149b3f6ec0cbeaa5698d64b1c9
|
7
|
+
data.tar.gz: e591fdfbec8eb9e579473075dabe554c04f163ffa23548746833d27053e5a55e7ba3514b2cd154acf43e8a4d2124f5cd753dd0bfcaa30697cd04d0e487bec318
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data.tar.gz.sig
ADDED
Binary file
|
data/lib/indexer101.rb
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: indexer101.rb
|
4
|
+
|
5
|
+
require 'c32'
|
6
|
+
require 'thread'
|
7
|
+
require 'thwait'
|
8
|
+
|
9
|
+
|
10
|
+
class Indexer101
|
11
|
+
using ColouredText
|
12
|
+
|
13
|
+
class Index
|
14
|
+
|
15
|
+
attr_reader :h
|
16
|
+
attr_accessor :index
|
17
|
+
|
18
|
+
def initialize()
|
19
|
+
end
|
20
|
+
|
21
|
+
def build(a)
|
22
|
+
|
23
|
+
threads = []
|
24
|
+
threads << Thread.new do
|
25
|
+
@index = Hash[a.map(&:to_sym).zip([''] * a.length)]
|
26
|
+
end
|
27
|
+
|
28
|
+
threads << Thread.new { @h = group a }
|
29
|
+
ThreadsWait.all_waits(*threads)
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect()
|
34
|
+
h = @h ? @h.inspect[0..30] + "..." : nil
|
35
|
+
"#<Indexer101::Index @h=#{h.inspect}>"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def group(a, length=0)
|
41
|
+
|
42
|
+
h = a.group_by {|x| x[0..length]}
|
43
|
+
|
44
|
+
h.each do |key, value|
|
45
|
+
|
46
|
+
if length+1 < value.max.length - 1 then
|
47
|
+
h2 = group value, length + 1
|
48
|
+
h[key] = h2 unless h2.length < 2 and value.length < 2
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
h3 = h.inject({}) do |r,x|
|
54
|
+
r.merge(x[0].to_sym => x[-1])
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def initialize(filename='indexer.dat', debug: false)
|
62
|
+
|
63
|
+
@filename, @debug = filename, debug
|
64
|
+
|
65
|
+
puts
|
66
|
+
puts 'Indexer101'.highlight + " ready to index".green
|
67
|
+
puts
|
68
|
+
|
69
|
+
@indexer = Index.new()
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
def build(a)
|
74
|
+
|
75
|
+
t = Time.now
|
76
|
+
@indexer.build(a)
|
77
|
+
t2 = Time.now - t
|
78
|
+
|
79
|
+
puts "%d words indexed".info % a.length
|
80
|
+
puts "index built in %.2f seconds".info % t2
|
81
|
+
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
def index()
|
86
|
+
@indexer.index
|
87
|
+
end
|
88
|
+
|
89
|
+
def read(filename=@filename)
|
90
|
+
|
91
|
+
t = Time.now
|
92
|
+
|
93
|
+
File.open(filename) do |f|
|
94
|
+
@indexer = Marshal.load(f)
|
95
|
+
end
|
96
|
+
|
97
|
+
t2 = Time.now - t
|
98
|
+
|
99
|
+
puts "index contains %d words".info % @indexer.index.length
|
100
|
+
puts "index read in %.2f seconds".info % t2
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
def save(filename=@filename)
|
105
|
+
|
106
|
+
File.open(filename, 'w+') do |f|
|
107
|
+
Marshal.dump(@indexer, f)
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
def search(s, limit: 10)
|
113
|
+
|
114
|
+
t = Time.now
|
115
|
+
a = scan_path s
|
116
|
+
puts ('a: ' + a.inspect[0..100] + '...').debug if @debug
|
117
|
+
|
118
|
+
i = scan_key @indexer.h, a
|
119
|
+
|
120
|
+
r = @indexer.h.dig(*a[0..i])
|
121
|
+
puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
|
122
|
+
|
123
|
+
return r if r.is_a? Array
|
124
|
+
|
125
|
+
results = scan_leaves(r).sort_by(&:length).take(limit)
|
126
|
+
t2 = Time.now - t
|
127
|
+
puts "search took %.2f seconds" % t2 if @debug
|
128
|
+
|
129
|
+
return results
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def scan_key(h, keys, index=0)
|
136
|
+
|
137
|
+
r = h.fetch keys[index]
|
138
|
+
|
139
|
+
puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
|
140
|
+
|
141
|
+
if r.is_a?(Hash) and index+1 < keys.length and r.fetch keys[index+1] then
|
142
|
+
scan_key r, keys, index+1
|
143
|
+
else
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
def scan_leaves(h)
|
150
|
+
|
151
|
+
h.inject([]) do |r,x|
|
152
|
+
key, value = x
|
153
|
+
|
154
|
+
if value.is_a? Array then
|
155
|
+
r += value
|
156
|
+
else
|
157
|
+
r += scan_leaves value
|
158
|
+
end
|
159
|
+
|
160
|
+
r
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def scan_path(s, length=0)
|
165
|
+
|
166
|
+
puts 'inside scan_path'.info if @debug
|
167
|
+
|
168
|
+
r = [s[0..length].to_sym]
|
169
|
+
|
170
|
+
if length < s.length - 1 then
|
171
|
+
r += scan_path(s, length+1)
|
172
|
+
else
|
173
|
+
r
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indexer101
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTExMjI1NjA5WhcN
|
15
|
+
MjAxMTEwMjI1NjA5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDEF92R
|
17
|
+
JWEWjPhgU6nIiCKKXtWI9EE8DbqPtU+CnYuUN2BWN7c4dsbaYrU1tVMzxx22G+Of
|
18
|
+
apTIeIGFrq/oqub6nhx+UbnkMAqpmbJS8zTgnwEhFsGHGI2CD9+4OXh5rl36SqqP
|
19
|
+
IGxY7w45KDvuPWA/Htb2aC20cHclJebOjeaMNogpYDByVEjjxtZeiOmIJtJlQSf+
|
20
|
+
YpUviQVTkFJqbSK0WkKsPLQZu29P1zHETkdBvIGlGGDdo13haBW2Hdj9a2INeWcz
|
21
|
+
B+v6nAdnv9fTTvH0GX51XDa/EIisWNELaopHk1Hcx97pZdm92gaICQfdgUHje65s
|
22
|
+
oUDjyynUKE8dq+LAPEq5B1wj3e4BsIOFI5PSvPCMWI5jpbJWBXV6owex9qln22UA
|
23
|
+
lDUEP3COb9/+r6bGNiCBUvUwyy7l/RdmRXPSOYnP8jPcwD/qSVypJYObM+2q68qc
|
24
|
+
5Eg0WqCVdFgpIXzBJPVcxeDJiN6EKmFWr5vJkMwIGz9hhIcitQjjSguk4r8CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUN0C2lCCv
|
26
|
+
/ATnmYSeZRQfUNpplfwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAXrKEIca0q3x//SBOsv17jkonBSlzwVLIBuXKXX4R
|
29
|
+
f0q3kJw7vWBPJfIVpAcGn6mbx2ziWw3XvG/SMuwIfzitz0pKTapvwbKZFPscqy82
|
30
|
+
KcDxGtJt1cjDHzl0Bm+mgN/MgY+PAj3TcT/osuCc8iTu4+Ib6UxkmOP/uy2svBLs
|
31
|
+
Su2XzGoqd5SKxPpj7IwMOBSVQWrqgWN2B+gdkN0CqjUPVMmiEKuMNjz9Idu36nG/
|
32
|
+
QOPsTlpqBxR+yFbPEP0DlN5X8BRRAsraQZ+LPi7W/bU1fjkvIJxXuUkQD9dMF99+
|
33
|
+
me+6s7PoCJ1yMmc5XfMFmv4WYswC+VqI1EeG0EneAsxRo8MmhWZcokqRE/KUk+ym
|
34
|
+
NlDFqcKPZDaMsdO5dkYvEeguet/iG3XS1u7WKAcZgfhPdiIbue7cAhz9eQpNybe7
|
35
|
+
08cN0E9zjqKINgH/PsZTot+ohuVRLwn6WmHHhb18oUrxt3a0u4/3TNcWOcMeR0F2
|
36
|
+
GeYL+mKGct5bfjn8IZnAJVKY
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2019-11-11 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: c32
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.2.0
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.2.0
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.2'
|
60
|
+
description:
|
61
|
+
email: james@jamesrobertson.eu
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/indexer101.rb
|
67
|
+
homepage: https://github.com/jrobertson/indexer101
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.0.3
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Experimental gem to search a list of words 1 character at a time. Intended
|
90
|
+
for use as auto suggestion.
|
91
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|