indexer101 0.2.0 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/indexer101.rb +75 -16
- metadata +48 -27
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58debc39c10c1dff7e46220800a6f063ee8ae0c2b7e47c5dc1161b1c2555d64d
|
4
|
+
data.tar.gz: e7b468fb8b030e9a6710625f7e3f6061e30b7d0ae0f371d5977c01f027ab3b32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 480085f3742927accef9c583ef778bedaef2d08c113b87a4fb109fd910ef3b662324b3a35b7cce4c86f3a8d59043f49f03a69e3bfd5d0620a23ba354212e764d
|
7
|
+
data.tar.gz: 62f4cd3287838fb3e25d8c54ba74259b03f40d4bafbcb51198b6ad47c3d67adc0f6dcfcf72eddc771be60356253e05b644152b4b5d3b20bb0662226c4299f12e
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/lib/indexer101.rb
CHANGED
@@ -6,6 +6,7 @@ require 'c32'
|
|
6
6
|
require 'thread'
|
7
7
|
require 'thwait'
|
8
8
|
require 'dynarex'
|
9
|
+
require 'dxlite'
|
9
10
|
|
10
11
|
|
11
12
|
class Indexer101
|
@@ -118,32 +119,81 @@ class Indexer101
|
|
118
119
|
|
119
120
|
end
|
120
121
|
|
121
|
-
|
122
|
+
# scan levels: 0 = tags only; 1 = all words in title (including tags)
|
123
|
+
#
|
124
|
+
def scan_dxindex(*locations, level: 0)
|
122
125
|
|
123
126
|
t = Time.now
|
124
127
|
threads = locations.flatten.map do |location|
|
125
|
-
|
128
|
+
|
129
|
+
Thread.new {
|
130
|
+
|
131
|
+
if location.is_a?(Dynarex) or location.is_a?(DxLite) then
|
132
|
+
|
133
|
+
Thread.current[:v] = location
|
134
|
+
|
135
|
+
elsif location.is_a? String
|
136
|
+
|
137
|
+
case File.extname(location)
|
138
|
+
when '.xml'
|
139
|
+
Thread.current[:v] = Dynarex.new location, debug: @debug
|
140
|
+
when '.json'
|
141
|
+
Thread.current[:v] = DxLite.new location, debug: @debug
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
}
|
126
146
|
end
|
127
147
|
|
128
148
|
ThreadsWait.all_waits(*threads)
|
129
149
|
|
130
150
|
a = threads.map {|x| x[:v]}
|
151
|
+
puts '_a: ' + a.inspect if @debug
|
131
152
|
t2 = Time.now - t
|
132
153
|
puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
|
133
154
|
+ " seconds").info
|
134
155
|
|
135
|
-
|
156
|
+
|
157
|
+
id = 1
|
158
|
+
|
159
|
+
a.each do |dx|
|
160
|
+
|
161
|
+
id2 = id
|
162
|
+
|
163
|
+
if @debug then
|
164
|
+
puts 'dx: ' + dx.class.inspect
|
165
|
+
puts 'dx.all: ' + dx.all.inspect
|
166
|
+
end
|
136
167
|
|
137
168
|
@indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
|
138
|
-
|
169
|
+
{|x,i| [id+i, [Time.parse(x.created), x.title, x.url]]}]
|
139
170
|
|
140
|
-
dx.all.reverse.each
|
141
|
-
|
142
|
-
|
143
|
-
|
171
|
+
dx.all.reverse.each do |x|
|
172
|
+
|
173
|
+
case level
|
174
|
+
when 0
|
175
|
+
|
176
|
+
x.title.scan(/(\#\w+)/).flatten(1).each do |keyword|
|
177
|
+
@indexer.index[keyword.downcase.to_sym] ||= []
|
178
|
+
@indexer.index[keyword.downcase.to_sym] << id2
|
179
|
+
end
|
180
|
+
|
181
|
+
when 1
|
182
|
+
|
183
|
+
# \u{A3} = £ <- represented as Unicode to avoid ASCII to UTF-8 error
|
184
|
+
x.title.split(/[\s:"!\?\(\)\u{A3}]+(?=[\w#_'-]+)/).each do |keyword|
|
185
|
+
@indexer.index[keyword.downcase.to_sym] ||= []
|
186
|
+
@indexer.index[keyword.downcase.to_sym] << id2
|
187
|
+
end
|
188
|
+
|
144
189
|
end
|
190
|
+
|
191
|
+
id2 += 1
|
192
|
+
|
145
193
|
end
|
146
194
|
|
195
|
+
id = id2
|
196
|
+
|
147
197
|
end
|
148
198
|
|
149
199
|
end
|
@@ -178,27 +228,36 @@ class Indexer101
|
|
178
228
|
|
179
229
|
# enter the exact keywords to search from the index
|
180
230
|
#
|
181
|
-
def search(*keywords)
|
231
|
+
def search(*keywords, minchars: 3)
|
182
232
|
|
183
233
|
t = Time.now
|
184
234
|
|
185
|
-
|
235
|
+
r = keywords.flatten(1).map do |x|
|
186
236
|
|
187
237
|
a = []
|
188
238
|
a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
|
189
239
|
|
190
|
-
if x.length
|
191
|
-
a += @indexer.index.keys.
|
192
|
-
{|y| @indexer.index[y]}
|
193
|
-
a += @indexer.index.keys.
|
194
|
-
{|y| @indexer.index[y]}
|
240
|
+
if x.length >= minchars then
|
241
|
+
a += @indexer.index.keys.grep(/^#{x}/i).flat_map\
|
242
|
+
{|y| @indexer.index[y].reverse}
|
243
|
+
a += @indexer.index.keys.grep(/#{x}/i).flat_map\
|
244
|
+
{|y| @indexer.index[y].reverse}
|
195
245
|
end
|
196
246
|
|
197
247
|
puts ('a: ' + a.inspect).debug if @debug
|
198
|
-
a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
|
199
248
|
|
249
|
+
a.uniq.map {|y| @indexer.uri_index[y]}
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
# group by number of results found, sort by count, then by date
|
254
|
+
a3 = r.flatten(1).group_by(&:last).to_a.sort do |x, x2|
|
255
|
+
-([x.last.length, x.last.first] <=> [x2.last.length, x2.last.first])
|
200
256
|
end
|
201
257
|
|
258
|
+
# fetch the 1st record from each group item
|
259
|
+
results = a3.map {|x| x.last.first}
|
260
|
+
|
202
261
|
t2 = Time.now - t
|
203
262
|
puts ("found %s results" % results.length).info
|
204
263
|
puts ("search took " + ("%.3f" % t2).brown + " seconds").info
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indexer101
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -11,31 +11,31 @@ cert_chain:
|
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
13
|
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAwNzQ2WhcN
|
15
|
+
MjIwMjIyMDAwNzQ2WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDqAwJO
|
17
|
+
ET6LAnOZB9q04zgLVFB0gJTcuLvfwjf7vpH9gCa5uqshSJnfi0owkeS2Hao0OwD+
|
18
|
+
vJrRRYbPfmXHAhEV8l9bSE6Ul1uTT9A+XS7g724sgOm5tCKFkLu+rcDy25MSjqpD
|
19
|
+
q+cPG4SN3ZUGK5eR9tp//dzdrjCV2wsOaoYKPajVY698p+sRf1zsHsSMxYnJPD/8
|
20
|
+
IkeNC+3VdsJFQ7wAoSk4hSpDuIi1xknA61/elDy5O07r1M25PJMntBE6QpJZblvw
|
21
|
+
v5u8U7+nK9P82KFfUwAjqkrhizt90M+0eK6dG44PnqafnxF84K2v7Qr1W6hKMIeL
|
22
|
+
DcqKPjAop+DO8WPCtfKFcFQKGRSe+H2rej3h34eGPH/GVoh8/h+ZuoDUfQRfoWLb
|
23
|
+
zUTM4uC1XwMlRjg8W4uYgV8SWZ5eii3tpmUz7moKQ4k9DSNNpGO+/bk/IcMVbRkb
|
24
|
+
xG/LzBAa6JQyAtVw9AMq8WHZKtJeNrdthoJSczsurthCHb0nY7VUQ/pp9JsCAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUVn1uBRaL
|
26
|
+
Zh8+3WB077Lz84bokZowJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
27
|
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAUzwCxgrA0YiJvXoi1ZHlhhz+ROzRn6XSQZZVd0Ym
|
29
|
+
gQVkUcvb/iiMnE0PZdivLiorRjhdR5tIPAYhmuN7Mr5IscQFdb1cndyC+qUzy6zP
|
30
|
+
HJGDGqqHDtiYpWlQ3/VAD4V+mAYj67CTj8gM2Y0OfjOIzKLf4jeLzcR0XjxCS9bH
|
31
|
+
g3cF/0FdJ5ydwo6r9QW/mE5yej6yuWBD9NFjWVbV/TAY3rWWDtw9g1WG31HjZVRB
|
32
|
+
lYndPAx0WIUBse5IRDGTiQ1JuMI5vBrxYJCb1Je506nR2rktACDRVSe/DTM4sxZn
|
33
|
+
oP3LBd1hPOAhNya8tD4FmUjQg4tvuWwIKh55XorZVEkzTWGgAJSnu7XTxtPcjxFA
|
34
|
+
U/3nmRr1BTYMN96T+3L81oqJTW5CxAAlsR97O7H8eZhwnNdG9HjgAk4PwiMLOhPb
|
35
|
+
Ely2/UitUG79uLcra+83gWVYzYiqBYC2d5HR4vCpTeecqYFXjWo9E3LMrvyB5Unk
|
36
|
+
zn10wjI+T1ysW7U6t+VJft8s
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date:
|
38
|
+
date: 2021-02-22 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: dynarex
|
@@ -46,7 +46,7 @@ dependencies:
|
|
46
46
|
version: '1.8'
|
47
47
|
- - ">="
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version: 1.8.
|
49
|
+
version: 1.8.25
|
50
50
|
type: :runtime
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -56,7 +56,27 @@ dependencies:
|
|
56
56
|
version: '1.8'
|
57
57
|
- - ">="
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version: 1.8.
|
59
|
+
version: 1.8.25
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: dxlite
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0.2'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.2.7
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0.2'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.2.7
|
60
80
|
description:
|
61
81
|
email: james@jamesrobertson.eu
|
62
82
|
executables: []
|
@@ -83,7 +103,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
103
|
- !ruby/object:Gem::Version
|
84
104
|
version: '0'
|
85
105
|
requirements: []
|
86
|
-
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.7.10
|
87
108
|
signing_key:
|
88
109
|
specification_version: 4
|
89
110
|
summary: Experimental gem to search a list of words 1 character at a time. Intended
|
metadata.gz.sig
CHANGED
Binary file
|