indexer101 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c12f1a8d9fc5dcde5c95bd80e0116236f4d70b9b0f835899b8339d765f5771b8
4
- data.tar.gz: d3aa8b7f9146bbe35a28496bc01654ffe6d638289771fde4fb62dd6aec371682
3
+ metadata.gz: '059a4e8a4809e20f53e1d35100dad97936de2da80e2477650fb7efd0841c1cc2'
4
+ data.tar.gz: 215873766f61840776a8d3540556a034536dd71621593b928af8cb100861f973
5
5
  SHA512:
6
- metadata.gz: ad823a882a9052f38de0acb790ee88b5dae6bd5bd19abbea22aa858a665f7d795d8cc373e4b4cc6ff0c34c0d552822484258c2203df0c25cb362f9d54ba56c78
7
- data.tar.gz: d20146a83a706cac482fdfde143b99f4649e1b4f77e9bae9bd000120c4a82c533600777d762b75e3cefd9f36e011aba07cc13f0160b5fb599438cbca23296ce7
6
+ metadata.gz: 54afb23966c1b323821dca57198fb0b45a88e5f0de514fd48361ec1fbb59c1903152d2854c30fb73b03b78f96b582d174ae6211cd1174105c465bff92665be55
7
+ data.tar.gz: 864f8fa54173d1e769e70fbc688bd48c073aefb9aaf4b076d3e327f0c5c4c686303c1b1ab2a64295868aea8eced05bae276949edda6f2a2edb57427b2185ed58
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
data/lib/indexer101.rb CHANGED
@@ -118,7 +118,9 @@ class Indexer101
118
118
 
119
119
  end
120
120
 
121
- def scan_dxindex(*locations)
121
+ # scan levels: 0 = tags only; 1 = all words in title (including tags)
122
+ #
123
+ def scan_dxindex(*locations, level: 0)
122
124
 
123
125
  t = Time.now
124
126
  threads = locations.flatten.map do |location|
@@ -132,18 +134,41 @@ class Indexer101
132
134
  puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
133
135
  + " seconds").info
134
136
 
135
- a.each.with_index do |dx, i|
137
+
138
+ id = 1
139
+
140
+ a.each do |dx|
141
+
142
+ id2 = id
136
143
 
137
144
  @indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
138
- {|x,j| [(i+1)*10000 + (j+1), [x.title, x.url].join(' ')]}]
145
+ {|x,i| [id+i, [Time.parse(x.created), x.title, x.url]]}]
139
146
 
140
- dx.all.reverse.each.with_index do |x,j|
141
- x.title.scan(/#(\w+)/).flatten(1).each do |keyword|
142
- @indexer.index[keyword.to_sym] ||= []
143
- @indexer.index[keyword.to_sym] << (i+1)*10000 + (j+1)
147
+ dx.all.reverse.each do |x|
148
+
149
+ case level
150
+ when 0
151
+
152
+ x.title.scan(/(\#\w+)/).flatten(1).each do |keyword|
153
+ @indexer.index[keyword.downcase.to_sym] ||= []
154
+ @indexer.index[keyword.downcase.to_sym] << id2
155
+ end
156
+
157
+ when 1
158
+
159
+ x.title.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/).each do |keyword|
160
+ @indexer.index[keyword.downcase.to_sym] ||= []
161
+ @indexer.index[keyword.downcase.to_sym] << id2
162
+ end
163
+
144
164
  end
165
+
166
+ id2 += 1
167
+
145
168
  end
146
169
 
170
+ id = id2
171
+
147
172
  end
148
173
 
149
174
  end
@@ -182,23 +207,32 @@ class Indexer101
182
207
 
183
208
  t = Time.now
184
209
 
185
- results = keywords.flatten(1).flat_map do |x|
210
+ r = keywords.flatten(1).map do |x|
186
211
 
187
212
  a = []
188
213
  a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
189
214
 
190
215
  if x.length > 3 then
191
- a += @indexer.index.keys.reverse.grep(/^#{x}/).flat_map\
192
- {|y| @indexer.index[y]}
193
- a += @indexer.index.keys.reverse.grep(/#{x}/).flat_map\
194
- {|y| @indexer.index[y]}
216
+ a += @indexer.index.keys.grep(/^#{x}/i).flat_map\
217
+ {|y| @indexer.index[y].reverse}
218
+ a += @indexer.index.keys.grep(/#{x}/i).flat_map\
219
+ {|y| @indexer.index[y].reverse}
195
220
  end
196
221
 
197
222
  puts ('a: ' + a.inspect).debug if @debug
198
- a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
199
223
 
224
+ a.uniq.map {|y| @indexer.uri_index[y]}
225
+
226
+ end
227
+
228
+ # group by number of results found, sort by count, then by date
229
+ a3 = r.flatten(1).group_by(&:last).to_a.sort do |x, x2|
230
+ -([x.last.length, x.last.first] <=> [x2.last.length, x2.last.first])
200
231
  end
201
232
 
233
+ # fetch the 1st record from each group item
234
+ results = a3.map {|x| x.last.first}
235
+
202
236
  t2 = Time.now - t
203
237
  puts ("found %s results" % results.length).info
204
238
  puts ("search took " + ("%.3f" % t2).brown + " seconds").info
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexer101
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file