indexer101 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/indexer101.rb +47 -13
- metadata +1 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '059a4e8a4809e20f53e1d35100dad97936de2da80e2477650fb7efd0841c1cc2'
|
4
|
+
data.tar.gz: 215873766f61840776a8d3540556a034536dd71621593b928af8cb100861f973
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54afb23966c1b323821dca57198fb0b45a88e5f0de514fd48361ec1fbb59c1903152d2854c30fb73b03b78f96b582d174ae6211cd1174105c465bff92665be55
|
7
|
+
data.tar.gz: 864f8fa54173d1e769e70fbc688bd48c073aefb9aaf4b076d3e327f0c5c4c686303c1b1ab2a64295868aea8eced05bae276949edda6f2a2edb57427b2185ed58
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/lib/indexer101.rb
CHANGED
@@ -118,7 +118,9 @@ class Indexer101
|
|
118
118
|
|
119
119
|
end
|
120
120
|
|
121
|
-
|
121
|
+
# scan levels: 0 = tags only; 1 = all words in title (including tags)
|
122
|
+
#
|
123
|
+
def scan_dxindex(*locations, level: 0)
|
122
124
|
|
123
125
|
t = Time.now
|
124
126
|
threads = locations.flatten.map do |location|
|
@@ -132,18 +134,41 @@ class Indexer101
|
|
132
134
|
puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
|
133
135
|
+ " seconds").info
|
134
136
|
|
135
|
-
|
137
|
+
|
138
|
+
id = 1
|
139
|
+
|
140
|
+
a.each do |dx|
|
141
|
+
|
142
|
+
id2 = id
|
136
143
|
|
137
144
|
@indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
|
138
|
-
|
145
|
+
{|x,i| [id+i, [Time.parse(x.created), x.title, x.url]]}]
|
139
146
|
|
140
|
-
dx.all.reverse.each
|
141
|
-
|
142
|
-
|
143
|
-
|
147
|
+
dx.all.reverse.each do |x|
|
148
|
+
|
149
|
+
case level
|
150
|
+
when 0
|
151
|
+
|
152
|
+
x.title.scan(/(\#\w+)/).flatten(1).each do |keyword|
|
153
|
+
@indexer.index[keyword.downcase.to_sym] ||= []
|
154
|
+
@indexer.index[keyword.downcase.to_sym] << id2
|
155
|
+
end
|
156
|
+
|
157
|
+
when 1
|
158
|
+
|
159
|
+
x.title.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/).each do |keyword|
|
160
|
+
@indexer.index[keyword.downcase.to_sym] ||= []
|
161
|
+
@indexer.index[keyword.downcase.to_sym] << id2
|
162
|
+
end
|
163
|
+
|
144
164
|
end
|
165
|
+
|
166
|
+
id2 += 1
|
167
|
+
|
145
168
|
end
|
146
169
|
|
170
|
+
id = id2
|
171
|
+
|
147
172
|
end
|
148
173
|
|
149
174
|
end
|
@@ -182,23 +207,32 @@ class Indexer101
|
|
182
207
|
|
183
208
|
t = Time.now
|
184
209
|
|
185
|
-
|
210
|
+
r = keywords.flatten(1).map do |x|
|
186
211
|
|
187
212
|
a = []
|
188
213
|
a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
|
189
214
|
|
190
215
|
if x.length > 3 then
|
191
|
-
a += @indexer.index.keys.
|
192
|
-
{|y| @indexer.index[y]}
|
193
|
-
a += @indexer.index.keys.
|
194
|
-
{|y| @indexer.index[y]}
|
216
|
+
a += @indexer.index.keys.grep(/^#{x}/i).flat_map\
|
217
|
+
{|y| @indexer.index[y].reverse}
|
218
|
+
a += @indexer.index.keys.grep(/#{x}/i).flat_map\
|
219
|
+
{|y| @indexer.index[y].reverse}
|
195
220
|
end
|
196
221
|
|
197
222
|
puts ('a: ' + a.inspect).debug if @debug
|
198
|
-
a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
|
199
223
|
|
224
|
+
a.uniq.map {|y| @indexer.uri_index[y]}
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
# group by number of results found, sort by count, then by date
|
229
|
+
a3 = r.flatten(1).group_by(&:last).to_a.sort do |x, x2|
|
230
|
+
-([x.last.length, x.last.first] <=> [x2.last.length, x2.last.first])
|
200
231
|
end
|
201
232
|
|
233
|
+
# fetch the 1st record from each group item
|
234
|
+
results = a3.map {|x| x.last.first}
|
235
|
+
|
202
236
|
t2 = Time.now - t
|
203
237
|
puts ("found %s results" % results.length).info
|
204
238
|
puts ("search took " + ("%.3f" % t2).brown + " seconds").info
|
metadata
CHANGED
metadata.gz.sig
CHANGED
Binary file
|