indexer101 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3987fe6c70a6da37dd0cbcb50bf3368601ee1b6ddc6a90da039d22dfa14f28b
4
- data.tar.gz: f7a79a0f09d97948a5576c253c8f2d2e5f58079fdc0830ee4fbeee28c208c3af
3
+ metadata.gz: c12f1a8d9fc5dcde5c95bd80e0116236f4d70b9b0f835899b8339d765f5771b8
4
+ data.tar.gz: d3aa8b7f9146bbe35a28496bc01654ffe6d638289771fde4fb62dd6aec371682
5
5
  SHA512:
6
- metadata.gz: fe696d43cc8c49962e3bb44a6f55497dd178b527d31cde3e395c2b2c1ef984add180fc503ac7f97fb2f4a716e37f785535972e149b3f6ec0cbeaa5698d64b1c9
7
- data.tar.gz: e591fdfbec8eb9e579473075dabe554c04f163ffa23548746833d27053e5a55e7ba3514b2cd154acf43e8a4d2124f5cd753dd0bfcaa30697cd04d0e487bec318
6
+ metadata.gz: ad823a882a9052f38de0acb790ee88b5dae6bd5bd19abbea22aa858a665f7d795d8cc373e4b4cc6ff0c34c0d552822484258c2203df0c25cb362f9d54ba56c78
7
+ data.tar.gz: d20146a83a706cac482fdfde143b99f4649e1b4f77e9bae9bd000120c4a82c533600777d762b75e3cefd9f36e011aba07cc13f0160b5fb599438cbca23296ce7
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/indexer101.rb CHANGED
@@ -5,24 +5,33 @@
5
5
  require 'c32'
6
6
  require 'thread'
7
7
  require 'thwait'
8
+ require 'dynarex'
8
9
 
9
10
 
10
11
  class Indexer101
11
12
  using ColouredText
12
13
 
13
14
  class Index
14
-
15
+
15
16
  attr_reader :h
16
- attr_accessor :index
17
+ attr_accessor :uri_index, :index
17
18
 
18
19
  def initialize()
20
+
21
+ @uri_index = {} # contains each URI long with the title
22
+ @index = {} # contains eack keyword
23
+ @h = {} # nested keywords constructed from shared string keys
24
+
19
25
  end
20
26
 
21
27
  def build(a)
22
28
 
23
29
  threads = []
24
- threads << Thread.new do
25
- @index = Hash[a.map(&:to_sym).zip([''] * a.length)]
30
+
31
+ if @index.empty? then
32
+ threads << Thread.new do
33
+ @index = Hash[a.map(&:to_sym).zip([''] * a.length)]
34
+ end
26
35
  end
27
36
 
28
37
  threads << Thread.new { @h = group a }
@@ -70,14 +79,14 @@ class Indexer101
70
79
 
71
80
  end
72
81
 
73
- def build(a)
82
+ def build(a=@indexer.index.keys)
74
83
 
75
84
  t = Time.now
76
85
  @indexer.build(a)
77
86
  t2 = Time.now - t
78
87
 
79
88
  puts "%d words indexed".info % a.length
80
- puts "index built in %.2f seconds".info % t2
89
+ puts ("index built in " + ("%.3f" % t2).brown + " seconds").info
81
90
 
82
91
  self
83
92
  end
@@ -97,7 +106,7 @@ class Indexer101
97
106
  t2 = Time.now - t
98
107
 
99
108
  puts "index contains %d words".info % @indexer.index.length
100
- puts "index read in %.2f seconds".info % t2
109
+ puts "index read in " + ("%.2f" % t2).brown + " seconds".info
101
110
 
102
111
  end
103
112
 
@@ -108,8 +117,45 @@ class Indexer101
108
117
  end
109
118
 
110
119
  end
120
+
121
+ def scan_dxindex(*locations)
122
+
123
+ t = Time.now
124
+ threads = locations.flatten.map do |location|
125
+ Thread.new {Thread.current[:v] = Dynarex.new location}
126
+ end
127
+
128
+ ThreadsWait.all_waits(*threads)
129
+
130
+ a = threads.map {|x| x[:v]}
131
+ t2 = Time.now - t
132
+ puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
133
+ + " seconds").info
134
+
135
+ a.each.with_index do |dx, i|
136
+
137
+ @indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
138
+ {|x,j| [(i+1)*10000 + (j+1), [x.title, x.url].join(' ')]}]
139
+
140
+ dx.all.reverse.each.with_index do |x,j|
141
+ x.title.scan(/#(\w+)/).flatten(1).each do |keyword|
142
+ @indexer.index[keyword.to_sym] ||= []
143
+ @indexer.index[keyword.to_sym] << (i+1)*10000 + (j+1)
144
+ end
145
+ end
146
+
147
+ end
148
+
149
+ end
150
+
151
+ def uri_index()
152
+ @indexer.uri_index
153
+ end
111
154
 
112
- def search(s, limit: 10)
155
+ # enter a few starting characters and lookup will suggest a few keywords
156
+ # useful for an auto suggest feature
157
+ #
158
+ def lookup(s, limit: 10)
113
159
 
114
160
  t = Time.now
115
161
  a = scan_path s
@@ -124,7 +170,39 @@ class Indexer101
124
170
 
125
171
  results = scan_leaves(r).sort_by(&:length).take(limit)
126
172
  t2 = Time.now - t
127
- puts "search took %.2f seconds" % t2 if @debug
173
+ puts ("lookup took " + ("%.3f" % t2).brown + " seconds").info
174
+
175
+ return results
176
+
177
+ end
178
+
179
+ # enter the exact keywords to search from the index
180
+ #
181
+ def search(*keywords)
182
+
183
+ t = Time.now
184
+
185
+ results = keywords.flatten(1).flat_map do |x|
186
+
187
+ a = []
188
+ a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
189
+
190
+ if x.length > 3 then
191
+ a += @indexer.index.keys.reverse.grep(/^#{x}/).flat_map\
192
+ {|y| @indexer.index[y]}
193
+ a += @indexer.index.keys.reverse.grep(/#{x}/).flat_map\
194
+ {|y| @indexer.index[y]}
195
+ end
196
+
197
+ puts ('a: ' + a.inspect).debug if @debug
198
+ a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
199
+
200
+ end
201
+
202
+ t2 = Time.now - t
203
+ puts ("found %s results" % results.length).info
204
+ puts ("search took " + ("%.3f" % t2).brown + " seconds").info
205
+ puts
128
206
 
129
207
  return results
130
208
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexer101
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,28 +35,28 @@ cert_chain:
35
35
  08cN0E9zjqKINgH/PsZTot+ohuVRLwn6WmHHhb18oUrxt3a0u4/3TNcWOcMeR0F2
36
36
  GeYL+mKGct5bfjn8IZnAJVKY
37
37
  -----END CERTIFICATE-----
38
- date: 2019-11-11 00:00:00.000000000 Z
38
+ date: 2019-11-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: c32
41
+ name: dynarex
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 0.2.0
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '0.2'
46
+ version: '1.8'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 1.8.21
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: 0.2.0
57
54
  - - "~>"
58
55
  - !ruby/object:Gem::Version
59
- version: '0.2'
56
+ version: '1.8'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.8.21
60
60
  description:
61
61
  email: james@jamesrobertson.eu
62
62
  executables: []
metadata.gz.sig CHANGED
Binary file