indexer101 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f3987fe6c70a6da37dd0cbcb50bf3368601ee1b6ddc6a90da039d22dfa14f28b
4
+ data.tar.gz: f7a79a0f09d97948a5576c253c8f2d2e5f58079fdc0830ee4fbeee28c208c3af
5
+ SHA512:
6
+ metadata.gz: fe696d43cc8c49962e3bb44a6f55497dd178b527d31cde3e395c2b2c1ef984add180fc503ac7f97fb2f4a716e37f785535972e149b3f6ec0cbeaa5698d64b1c9
7
+ data.tar.gz: e591fdfbec8eb9e579473075dabe554c04f163ffa23548746833d27053e5a55e7ba3514b2cd154acf43e8a4d2124f5cd753dd0bfcaa30697cd04d0e487bec318
Binary file
Binary file
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: indexer101.rb
4
+
5
+ require 'c32'
6
+ require 'thread'
7
+ require 'thwait'
8
+
9
+
10
+ class Indexer101
11
+ using ColouredText
12
+
13
+ class Index
14
+
15
+ attr_reader :h
16
+ attr_accessor :index
17
+
18
+ def initialize()
19
+ end
20
+
21
+ def build(a)
22
+
23
+ threads = []
24
+ threads << Thread.new do
25
+ @index = Hash[a.map(&:to_sym).zip([''] * a.length)]
26
+ end
27
+
28
+ threads << Thread.new { @h = group a }
29
+ ThreadsWait.all_waits(*threads)
30
+
31
+ end
32
+
33
+ def inspect()
34
+ h = @h ? @h.inspect[0..30] + "..." : nil
35
+ "#<Indexer101::Index @h=#{h.inspect}>"
36
+ end
37
+
38
+ private
39
+
40
+ def group(a, length=0)
41
+
42
+ h = a.group_by {|x| x[0..length]}
43
+
44
+ h.each do |key, value|
45
+
46
+ if length+1 < value.max.length - 1 then
47
+ h2 = group value, length + 1
48
+ h[key] = h2 unless h2.length < 2 and value.length < 2
49
+ end
50
+
51
+ end
52
+
53
+ h3 = h.inject({}) do |r,x|
54
+ r.merge(x[0].to_sym => x[-1])
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+
61
+ def initialize(filename='indexer.dat', debug: false)
62
+
63
+ @filename, @debug = filename, debug
64
+
65
+ puts
66
+ puts 'Indexer101'.highlight + " ready to index".green
67
+ puts
68
+
69
+ @indexer = Index.new()
70
+
71
+ end
72
+
73
+ def build(a)
74
+
75
+ t = Time.now
76
+ @indexer.build(a)
77
+ t2 = Time.now - t
78
+
79
+ puts "%d words indexed".info % a.length
80
+ puts "index built in %.2f seconds".info % t2
81
+
82
+ self
83
+ end
84
+
85
+ def index()
86
+ @indexer.index
87
+ end
88
+
89
+ def read(filename=@filename)
90
+
91
+ t = Time.now
92
+
93
+ File.open(filename) do |f|
94
+ @indexer = Marshal.load(f)
95
+ end
96
+
97
+ t2 = Time.now - t
98
+
99
+ puts "index contains %d words".info % @indexer.index.length
100
+ puts "index read in %.2f seconds".info % t2
101
+
102
+ end
103
+
104
+ def save(filename=@filename)
105
+
106
+ File.open(filename, 'w+') do |f|
107
+ Marshal.dump(@indexer, f)
108
+ end
109
+
110
+ end
111
+
112
+ def search(s, limit: 10)
113
+
114
+ t = Time.now
115
+ a = scan_path s
116
+ puts ('a: ' + a.inspect[0..100] + '...').debug if @debug
117
+
118
+ i = scan_key @indexer.h, a
119
+
120
+ r = @indexer.h.dig(*a[0..i])
121
+ puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
122
+
123
+ return r if r.is_a? Array
124
+
125
+ results = scan_leaves(r).sort_by(&:length).take(limit)
126
+ t2 = Time.now - t
127
+ puts "search took %.2f seconds" % t2 if @debug
128
+
129
+ return results
130
+
131
+ end
132
+
133
+ private
134
+
135
+ def scan_key(h, keys, index=0)
136
+
137
+ r = h.fetch keys[index]
138
+
139
+ puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
140
+
141
+ if r.is_a?(Hash) and index+1 < keys.length and r.fetch keys[index+1] then
142
+ scan_key r, keys, index+1
143
+ else
144
+ index
145
+ end
146
+
147
+ end
148
+
149
+ def scan_leaves(h)
150
+
151
+ h.inject([]) do |r,x|
152
+ key, value = x
153
+
154
+ if value.is_a? Array then
155
+ r += value
156
+ else
157
+ r += scan_leaves value
158
+ end
159
+
160
+ r
161
+ end
162
+ end
163
+
164
+ def scan_path(s, length=0)
165
+
166
+ puts 'inside scan_path'.info if @debug
167
+
168
+ r = [s[0..length].to_sym]
169
+
170
+ if length < s.length - 1 then
171
+ r += scan_path(s, length+1)
172
+ else
173
+ r
174
+ end
175
+ end
176
+
177
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: indexer101
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTExMjI1NjA5WhcN
15
+ MjAxMTEwMjI1NjA5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDEF92R
17
+ JWEWjPhgU6nIiCKKXtWI9EE8DbqPtU+CnYuUN2BWN7c4dsbaYrU1tVMzxx22G+Of
18
+ apTIeIGFrq/oqub6nhx+UbnkMAqpmbJS8zTgnwEhFsGHGI2CD9+4OXh5rl36SqqP
19
+ IGxY7w45KDvuPWA/Htb2aC20cHclJebOjeaMNogpYDByVEjjxtZeiOmIJtJlQSf+
20
+ YpUviQVTkFJqbSK0WkKsPLQZu29P1zHETkdBvIGlGGDdo13haBW2Hdj9a2INeWcz
21
+ B+v6nAdnv9fTTvH0GX51XDa/EIisWNELaopHk1Hcx97pZdm92gaICQfdgUHje65s
22
+ oUDjyynUKE8dq+LAPEq5B1wj3e4BsIOFI5PSvPCMWI5jpbJWBXV6owex9qln22UA
23
+ lDUEP3COb9/+r6bGNiCBUvUwyy7l/RdmRXPSOYnP8jPcwD/qSVypJYObM+2q68qc
24
+ 5Eg0WqCVdFgpIXzBJPVcxeDJiN6EKmFWr5vJkMwIGz9hhIcitQjjSguk4r8CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUN0C2lCCv
26
+ /ATnmYSeZRQfUNpplfwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAXrKEIca0q3x//SBOsv17jkonBSlzwVLIBuXKXX4R
29
+ f0q3kJw7vWBPJfIVpAcGn6mbx2ziWw3XvG/SMuwIfzitz0pKTapvwbKZFPscqy82
30
+ KcDxGtJt1cjDHzl0Bm+mgN/MgY+PAj3TcT/osuCc8iTu4+Ib6UxkmOP/uy2svBLs
31
+ Su2XzGoqd5SKxPpj7IwMOBSVQWrqgWN2B+gdkN0CqjUPVMmiEKuMNjz9Idu36nG/
32
+ QOPsTlpqBxR+yFbPEP0DlN5X8BRRAsraQZ+LPi7W/bU1fjkvIJxXuUkQD9dMF99+
33
+ me+6s7PoCJ1yMmc5XfMFmv4WYswC+VqI1EeG0EneAsxRo8MmhWZcokqRE/KUk+ym
34
+ NlDFqcKPZDaMsdO5dkYvEeguet/iG3XS1u7WKAcZgfhPdiIbue7cAhz9eQpNybe7
35
+ 08cN0E9zjqKINgH/PsZTot+ohuVRLwn6WmHHhb18oUrxt3a0u4/3TNcWOcMeR0F2
36
+ GeYL+mKGct5bfjn8IZnAJVKY
37
+ -----END CERTIFICATE-----
38
+ date: 2019-11-11 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: c32
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.2.0
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '0.2'
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.2.0
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '0.2'
60
+ description:
61
+ email: james@jamesrobertson.eu
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/indexer101.rb
67
+ homepage: https://github.com/jrobertson/indexer101
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.0.3
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Experimental gem to search a list of words 1 character at a time. Intended
90
+ for use as auto suggestion.
91
+ test_files: []
Binary file