indexer101 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c12f1a8d9fc5dcde5c95bd80e0116236f4d70b9b0f835899b8339d765f5771b8
4
- data.tar.gz: d3aa8b7f9146bbe35a28496bc01654ffe6d638289771fde4fb62dd6aec371682
3
+ metadata.gz: '059a4e8a4809e20f53e1d35100dad97936de2da80e2477650fb7efd0841c1cc2'
4
+ data.tar.gz: 215873766f61840776a8d3540556a034536dd71621593b928af8cb100861f973
5
5
  SHA512:
6
- metadata.gz: ad823a882a9052f38de0acb790ee88b5dae6bd5bd19abbea22aa858a665f7d795d8cc373e4b4cc6ff0c34c0d552822484258c2203df0c25cb362f9d54ba56c78
7
- data.tar.gz: d20146a83a706cac482fdfde143b99f4649e1b4f77e9bae9bd000120c4a82c533600777d762b75e3cefd9f36e011aba07cc13f0160b5fb599438cbca23296ce7
6
+ metadata.gz: 54afb23966c1b323821dca57198fb0b45a88e5f0de514fd48361ec1fbb59c1903152d2854c30fb73b03b78f96b582d174ae6211cd1174105c465bff92665be55
7
+ data.tar.gz: 864f8fa54173d1e769e70fbc688bd48c073aefb9aaf4b076d3e327f0c5c4c686303c1b1ab2a64295868aea8eced05bae276949edda6f2a2edb57427b2185ed58
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
data/lib/indexer101.rb CHANGED
@@ -118,7 +118,9 @@ class Indexer101
118
118
 
119
119
  end
120
120
 
121
- def scan_dxindex(*locations)
121
+ # scan levels: 0 = tags only; 1 = all words in title (including tags)
122
+ #
123
+ def scan_dxindex(*locations, level: 0)
122
124
 
123
125
  t = Time.now
124
126
  threads = locations.flatten.map do |location|
@@ -132,18 +134,41 @@ class Indexer101
132
134
  puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
133
135
  + " seconds").info
134
136
 
135
- a.each.with_index do |dx, i|
137
+
138
+ id = 1
139
+
140
+ a.each do |dx|
141
+
142
+ id2 = id
136
143
 
137
144
  @indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
138
- {|x,j| [(i+1)*10000 + (j+1), [x.title, x.url].join(' ')]}]
145
+ {|x,i| [id+i, [Time.parse(x.created), x.title, x.url]]}]
139
146
 
140
- dx.all.reverse.each.with_index do |x,j|
141
- x.title.scan(/#(\w+)/).flatten(1).each do |keyword|
142
- @indexer.index[keyword.to_sym] ||= []
143
- @indexer.index[keyword.to_sym] << (i+1)*10000 + (j+1)
147
+ dx.all.reverse.each do |x|
148
+
149
+ case level
150
+ when 0
151
+
152
+ x.title.scan(/(\#\w+)/).flatten(1).each do |keyword|
153
+ @indexer.index[keyword.downcase.to_sym] ||= []
154
+ @indexer.index[keyword.downcase.to_sym] << id2
155
+ end
156
+
157
+ when 1
158
+
159
+ x.title.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/).each do |keyword|
160
+ @indexer.index[keyword.downcase.to_sym] ||= []
161
+ @indexer.index[keyword.downcase.to_sym] << id2
162
+ end
163
+
144
164
  end
165
+
166
+ id2 += 1
167
+
145
168
  end
146
169
 
170
+ id = id2
171
+
147
172
  end
148
173
 
149
174
  end
@@ -182,23 +207,32 @@ class Indexer101
182
207
 
183
208
  t = Time.now
184
209
 
185
- results = keywords.flatten(1).flat_map do |x|
210
+ r = keywords.flatten(1).map do |x|
186
211
 
187
212
  a = []
188
213
  a += @indexer.index[x.to_sym].reverse if @indexer.index.has_key? x.to_sym
189
214
 
190
215
  if x.length > 3 then
191
- a += @indexer.index.keys.reverse.grep(/^#{x}/).flat_map\
192
- {|y| @indexer.index[y]}
193
- a += @indexer.index.keys.reverse.grep(/#{x}/).flat_map\
194
- {|y| @indexer.index[y]}
216
+ a += @indexer.index.keys.grep(/^#{x}/i).flat_map\
217
+ {|y| @indexer.index[y].reverse}
218
+ a += @indexer.index.keys.grep(/#{x}/i).flat_map\
219
+ {|y| @indexer.index[y].reverse}
195
220
  end
196
221
 
197
222
  puts ('a: ' + a.inspect).debug if @debug
198
- a.uniq.map {|y| @indexer.uri_index[y].split(/\s+(?=https?[^\s]+$)/,2) }
199
223
 
224
+ a.uniq.map {|y| @indexer.uri_index[y]}
225
+
226
+ end
227
+
228
+ # group by number of results found, sort by count, then by date
229
+ a3 = r.flatten(1).group_by(&:last).to_a.sort do |x, x2|
230
+ -([x.last.length, x.last.first] <=> [x2.last.length, x2.last.first])
200
231
  end
201
232
 
233
+ # fetch the 1st record from each group item
234
+ results = a3.map {|x| x.last.first}
235
+
202
236
  t2 = Time.now - t
203
237
  puts ("found %s results" % results.length).info
204
238
  puts ("search took " + ("%.3f" % t2).brown + " seconds").info
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexer101
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file