indexer101 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3987fe6c70a6da37dd0cbcb50bf3368601ee1b6ddc6a90da039d22dfa14f28b
4
- data.tar.gz: f7a79a0f09d97948a5576c253c8f2d2e5f58079fdc0830ee4fbeee28c208c3af
3
+ metadata.gz: c12f1a8d9fc5dcde5c95bd80e0116236f4d70b9b0f835899b8339d765f5771b8
4
+ data.tar.gz: d3aa8b7f9146bbe35a28496bc01654ffe6d638289771fde4fb62dd6aec371682
5
5
  SHA512:
6
- metadata.gz: fe696d43cc8c49962e3bb44a6f55497dd178b527d31cde3e395c2b2c1ef984add180fc503ac7f97fb2f4a716e37f785535972e149b3f6ec0cbeaa5698d64b1c9
7
- data.tar.gz: e591fdfbec8eb9e579473075dabe554c04f163ffa23548746833d27053e5a55e7ba3514b2cd154acf43e8a4d2124f5cd753dd0bfcaa30697cd04d0e487bec318
6
+ metadata.gz: ad823a882a9052f38de0acb790ee88b5dae6bd5bd19abbea22aa858a665f7d795d8cc373e4b4cc6ff0c34c0d552822484258c2203df0c25cb362f9d54ba56c78
7
+ data.tar.gz: d20146a83a706cac482fdfde143b99f4649e1b4f77e9bae9bd000120c4a82c533600777d762b75e3cefd9f36e011aba07cc13f0160b5fb599438cbca23296ce7
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/indexer101.rb CHANGED
@@ -5,24 +5,33 @@
5
5
  require 'c32'
6
6
  require 'thread'
7
7
  require 'thwait'
8
+ require 'dynarex'
8
9
 
9
10
 
10
11
  class Indexer101
11
12
  using ColouredText
12
13
 
13
14
  class Index
14
-
15
+
15
16
  attr_reader :h
16
- attr_accessor :index
17
+ attr_accessor :uri_index, :index
17
18
 
18
19
  def initialize()
20
+
21
+ @uri_index = {} # contains each URI long with the title
22
+ @index = {} # contains eack keyword
23
+ @h = {} # nested keywords constructed from shared string keys
24
+
19
25
  end
20
26
 
21
27
  def build(a)
22
28
 
23
29
  threads = []
24
- threads << Thread.new do
25
- @index = Hash[a.map(&:to_sym).zip([''] * a.length)]
30
+
31
+ if @index.empty? then
32
+ threads << Thread.new do
33
+ @index = Hash[a.map(&:to_sym).zip([''] * a.length)]
34
+ end
26
35
  end
27
36
 
28
37
  threads << Thread.new { @h = group a }
@@ -70,14 +79,14 @@ class Indexer101
70
79
 
71
80
  end
72
81
 
73
- def build(a)
82
+ def build(a=@indexer.index.keys)
74
83
 
75
84
  t = Time.now
76
85
  @indexer.build(a)
77
86
  t2 = Time.now - t
78
87
 
79
88
  puts "%d words indexed".info % a.length
80
- puts "index built in %.2f seconds".info % t2
89
+ puts ("index built in " + ("%.3f" % t2).brown + " seconds").info
81
90
 
82
91
  self
83
92
  end
@@ -97,7 +106,7 @@ class Indexer101
97
106
  t2 = Time.now - t
98
107
 
99
108
  puts "index contains %d words".info % @indexer.index.length
100
- puts "index read in %.2f seconds".info % t2
109
+ puts "index read in " + ("%.2f" % t2).brown + " seconds".info
101
110
 
102
111
  end
103
112
 
@@ -108,8 +117,45 @@ class Indexer101
108
117
  end
109
118
 
110
119
  end
120
+
121
+ def scan_dxindex(*locations)
122
+
123
+ t = Time.now
124
+ threads = locations.flatten.map do |location|
125
+ Thread.new {Thread.current[:v] = Dynarex.new location}
126
+ end
127
+
128
+ ThreadsWait.all_waits(*threads)
129
+
130
+ a = threads.map {|x| x[:v]}
131
+ t2 = Time.now - t
132
+ puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
133
+ + " seconds").info
134
+
135
+ a.each.with_index do |dx, i|
136
+
137
+ @indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
138
+ {|x,j| [(i+1)*10000 + (j+1), [x.title, x.url].join(' ')]}]
139
+
140
+ dx.all.reverse.each.with_index do |x,j|
141
+ x.title.scan(/#(\w+)/).flatten(1).each do |keyword|
142
+ @indexer.index[keyword.to_sym] ||= []
143
+ @indexer.index[keyword.to_sym] << (i+1)*10000 + (j+1)
144
+ end
145
+ end
146
+
147
+ end
148
+
149
+ end
150
+
151
+ def uri_index()
152
+ @indexer.uri_index
153
+ end
111
154
 
112
- def search(s, limit: 10)
155
+ # enter a few starting characters and lookup will suggest a few keywords
156
+ # useful for an auto suggest feature
157
+ #
158
+ def lookup(s, limit: 10)
113
159
 
114
160
  t = Time.now
115
161
  a = scan_path s
@@ -124,7 +170,39 @@ class Indexer101
124
170
 
125
171
  results = scan_leaves(r).sort_by(&:length).take(limit)
126
172
  t2 = Time.now - t
127
- puts "search took %.2f seconds" % t2 if @debug
173
+ puts ("lookup took " + ("%.3f" % t2).brown + " seconds").info
174
+
175
+ return results
176
+
177
+ end
178
+
179
+ # enter the exact keywords to search from the index
180
+ #
181
+ def search(*keywords)
182
+
183
+ t = Time.now
184
+
185
+ results = keywords.flatten(1).flat_map do |x|
186
+
187
+ a = []
188
+ a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
189
+
190
+ if x.length > 3 then
191
+ a += @indexer.index.keys.reverse.grep(/^#{x}/).flat_map\
192
+ {|y| @indexer.index[y]}
193
+ a += @indexer.index.keys.reverse.grep(/#{x}/).flat_map\
194
+ {|y| @indexer.index[y]}
195
+ end
196
+
197
+ puts ('a: ' + a.inspect).debug if @debug
198
+ a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
199
+
200
+ end
201
+
202
+ t2 = Time.now - t
203
+ puts ("found %s results" % results.length).info
204
+ puts ("search took " + ("%.3f" % t2).brown + " seconds").info
205
+ puts
128
206
 
129
207
  return results
130
208
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexer101
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,28 +35,28 @@ cert_chain:
35
35
  08cN0E9zjqKINgH/PsZTot+ohuVRLwn6WmHHhb18oUrxt3a0u4/3TNcWOcMeR0F2
36
36
  GeYL+mKGct5bfjn8IZnAJVKY
37
37
  -----END CERTIFICATE-----
38
- date: 2019-11-11 00:00:00.000000000 Z
38
+ date: 2019-11-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: c32
41
+ name: dynarex
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 0.2.0
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '0.2'
46
+ version: '1.8'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 1.8.21
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: 0.2.0
57
54
  - - "~>"
58
55
  - !ruby/object:Gem::Version
59
- version: '0.2'
56
+ version: '1.8'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.8.21
60
60
  description:
61
61
  email: james@jamesrobertson.eu
62
62
  executables: []
metadata.gz.sig CHANGED
Binary file