gitlab-elasticsearch-git 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/lib/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch/git/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "gitlab-elasticsearch-git"
8
+ spec.version = Elasticsearch::Git::VERSION
9
+ spec.authors = ["Andrey Kumanyaev", "Evgeniy Sokovikov", "GitLab B.V."]
10
+ spec.email = ["me@zzet.org", "skv-headless@yandex.ru"]
11
+ spec.summary = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for indexing git repositories.}
13
+ spec.homepage = "https://gitlab.com/gitlab-org/gitlab-elasticsearch-git"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'elasticsearch-model'
22
+ spec.add_runtime_dependency 'elasticsearch-api', '~> 0.4.0'
23
+ spec.add_runtime_dependency 'rugged', '~> 0.23.3'
24
+ spec.add_runtime_dependency 'charlock_holmes', '~> 0.7.3'
25
+ spec.add_runtime_dependency 'github-linguist', '~> 4.7.0'
26
+ spec.add_runtime_dependency 'activemodel', '~> 4.2.0'
27
+ spec.add_runtime_dependency 'activesupport', '~> 4.2.0'
28
+ end
@@ -0,0 +1,9 @@
1
+ require "elasticsearch/git/version"
2
+ require "elasticsearch/git/model"
3
+ require "elasticsearch/git/repository"
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ end
8
+ end
9
+
@@ -0,0 +1,43 @@
1
+ require 'active_support/concern'
2
+ require 'charlock_holmes'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ module EncoderHelper
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ def encode!(message)
11
+ return nil unless message.respond_to? :force_encoding
12
+
13
+ # if message is utf-8 encoding, just return it
14
+ message.force_encoding("UTF-8")
15
+ return message if message.valid_encoding?
16
+
17
+ # return message if message type is binary
18
+ detect = CharlockHolmes::EncodingDetector.detect(message)
19
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
20
+
21
+ # encoding message to detect encoding
22
+ if detect && detect[:encoding]
23
+ message.force_encoding(detect[:encoding])
24
+ end
25
+
26
+ # encode and clean the bad chars
27
+ message.replace clean(message)
28
+ rescue
29
+ encoding = detect ? detect[:encoding] : "unknown"
30
+ "--broken encoding: #{encoding}"
31
+ end
32
+
33
+ private
34
+
35
+ def clean(message)
36
+ message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
37
+ .encode("UTF-8")
38
+ .gsub("\0".encode("UTF-8"), "")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,25 @@
1
+ require 'linguist'
2
+ require 'elasticsearch/git/encoder_helper'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ class LiteBlob
7
+ include Linguist::BlobHelper
8
+ include Elasticsearch::Git::EncoderHelper
9
+
10
+ attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
11
+
12
+ def initialize(repo, raw_blob_hash)
13
+ @id = raw_blob_hash[:oid]
14
+
15
+ blob = repo.lookup(@id)
16
+
17
+ @mode = raw_blob_hash[:mode].to_s(8)
18
+ @size = blob.size
19
+ @path = encode!(raw_blob_hash[:path])
20
+ @name = @path.split('/').last
21
+ @data = encode!(blob.content)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,92 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch/model'
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ module Model
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ extend ActiveModel::Naming
12
+ include ActiveModel::Model
13
+ include Elasticsearch::Model
14
+
15
+ env = if defined?(::Rails)
16
+ ::Rails.env.to_s
17
+ else
18
+ "undefined"
19
+ end
20
+
21
+ index_name [self.name.downcase, 'index', env].join('-')
22
+
23
+ settings \
24
+ index: {
25
+ analysis: {
26
+ analyzer: {
27
+ human_analyzer: {
28
+ type: 'custom',
29
+ tokenizer: 'human_tokenizer',
30
+ filter: %w(lowercase asciifolding human_ngrams)
31
+ },
32
+ path_analyzer: {
33
+ type: 'custom',
34
+ tokenizer: 'path_tokenizer',
35
+ filter: %w(lowercase asciifolding path_ngrams)
36
+ },
37
+ sha_analyzer: {
38
+ type: 'custom',
39
+ tokenizer: 'sha_tokenizer',
40
+ filter: %w(lowercase asciifolding sha_ngrams)
41
+ },
42
+ code_analyzer: {
43
+ type: 'custom',
44
+ tokenizer: 'standard',
45
+ filter: %w(lowercase asciifolding code_stemmer)
46
+ }
47
+ },
48
+ tokenizer: {
49
+ sha_tokenizer: {
50
+ type: "edgeNGram",
51
+ min_gram: 8,
52
+ max_gram: 40,
53
+ token_chars: %w(letter digit)
54
+ },
55
+ human_tokenizer: {
56
+ type: "nGram",
57
+ min_gram: 1,
58
+ max_gram: 20,
59
+ token_chars: %w(letter digit)
60
+ },
61
+ path_tokenizer: {
62
+ type: 'path_hierarchy',
63
+ reverse: true
64
+ },
65
+ },
66
+ filter: {
67
+ human_ngrams: {
68
+ type: "nGram",
69
+ min_gram: 1,
70
+ max_gram: 20
71
+ },
72
+ sha_ngrams: {
73
+ type: "edgeNGram",
74
+ min_gram: 8,
75
+ max_gram: 40
76
+ },
77
+ path_ngrams: {
78
+ type: "edgeNGram",
79
+ min_gram: 3,
80
+ max_gram: 15
81
+ },
82
+ code_stemmer: {
83
+ type: "stemmer",
84
+ name: "minimal_english"
85
+ }
86
+ }
87
+ }
88
+ }
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,570 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch'
4
+ require 'elasticsearch/git/model'
5
+ require 'elasticsearch/git/encoder_helper'
6
+ require 'elasticsearch/git/lite_blob'
7
+ require 'rugged'
8
+
9
+ module Elasticsearch
10
+ module Git
11
+ module Repository
12
+ class CreateIndexException < StandardError; end
13
+
14
+ extend ActiveSupport::Concern
15
+
16
+ included do
17
+ include Elasticsearch::Git::Model
18
+ include Elasticsearch::Git::EncoderHelper
19
+
20
+ mapping _timestamp: { enabled: true } do
21
+ indexes :blob do
22
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, analyzer: :human_analyzer
23
+ indexes :rid, type: :string, index: :not_analyzed
24
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
25
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
26
+ indexes :path, type: :string, search_analyzer: :path_analyzer, analyzer: :path_analyzer
27
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
28
+ indexes :language, type: :string, index: :not_analyzed
29
+ end
30
+
31
+ indexes :commit do
32
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, analyzer: :human_analyzer
33
+ indexes :rid, type: :string, index: :not_analyzed
34
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
35
+
36
+ indexes :author do
37
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
38
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
39
+ indexes :time, type: :date
40
+ end
41
+
42
+ indexes :commiter do
43
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
44
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
45
+ indexes :time, type: :date
46
+ end
47
+
48
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
49
+ end
50
+ end
51
+
52
+ # Indexing all text-like blobs in repository
53
+ #
54
+ # All data stored in global index
55
+ # Repository can be selected by 'rid' field
56
+ # If you want - this field can be used for store 'project' id
57
+ #
58
+ # blob {
59
+ # id - uniq id of blob from all repositories
60
+ # oid - blob id in repository
61
+ # content - blob content
62
+ # commit_sha - last actual commit sha
63
+ # }
64
+ #
65
+ # For search from blobs use type 'blob'
66
+ def index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
67
+ from, to = parse_revs(from_rev, to_rev)
68
+
69
+ diff = repository_for_indexing.diff(from, to)
70
+
71
+ diff.deltas.reverse.each_with_index do |delta, step|
72
+ if delta.status == :deleted
73
+ next if delta.old_file[:mode].to_s(8) == "160000"
74
+ b = LiteBlob.new(repository_for_indexing, delta.old_file)
75
+ delete_from_index_blob(b)
76
+ else
77
+ next if delta.new_file[:mode].to_s(8) == "160000"
78
+ b = LiteBlob.new(repository_for_indexing, delta.new_file)
79
+ index_blob(b, to)
80
+ end
81
+
82
+ # Run GC every 100 blobs
83
+ ObjectSpace.garbage_collect if step % 100 == 0
84
+ end
85
+ end
86
+
87
+ def index_blob(blob, target_sha)
88
+ if can_index_blob?(blob)
89
+ tries = 0
90
+
91
+ begin
92
+ client_for_indexing.index \
93
+ index: "#{self.class.index_name}",
94
+ type: self.class.name.underscore,
95
+ id: "#{repository_id}_#{blob.path}",
96
+ body: {
97
+ blob: {
98
+ type: "blob",
99
+ oid: blob.id,
100
+ rid: repository_id,
101
+ content: blob.data,
102
+ commit_sha: target_sha,
103
+ path: blob.path,
104
+ language: blob.language ? blob.language.name : "Text"
105
+ }
106
+ }
107
+ rescue Exception => ex
108
+ if tries < 2
109
+ tries += 1
110
+ sleep 1
111
+ retry
112
+ else
113
+ raise CreateIndexException, "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ # Index text-like files which size less 1.mb
120
+ def can_index_blob?(blob)
121
+ blob.text? && (blob.size && blob.size.to_i < 1048576)
122
+ end
123
+
124
+ def delete_from_index_blob(blob)
125
+ if blob.text?
126
+ begin
127
+ client_for_indexing.delete \
128
+ index: "#{self.class.index_name}",
129
+ type: "repository",
130
+ id: "#{repository_id}_#{blob.path}"
131
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
132
+ return true
133
+ rescue Exception => ex
134
+ raise CreateIndexException, "Error with removing file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
135
+ end
136
+ end
137
+ end
138
+
139
+ # Indexing all commits in repository
140
+ #
141
+ # All data stored in global index
142
+ # Repository can be filtered by 'rid' field
143
+ # If you want - this field can be used git store 'project' id
144
+ #
145
+ # commit {
146
+ # sha - commit sha
147
+ # author {
148
+ # name - commit author name
149
+ # email - commit author email
150
+ # time - commit time
151
+ # }
152
+ # commiter {
153
+ # name - committer name
154
+ # email - committer email
155
+ # time - commit time
156
+ # }
157
+ # message - commit message
158
+ # }
159
+ #
160
+ # For search from commits use type 'commit'
161
+ def index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
162
+ from, to = parse_revs(from_rev, to_rev)
163
+ range = [from, to].reject(&:nil?).join('..')
164
+ out, err, status = Open3.capture3("git log #{range} --format=\"%H\"", chdir: repository_for_indexing.path)
165
+
166
+ if status.success? && err.blank?
167
+ #TODO use rugged walker!!!
168
+ commit_oids = out.split("\n")
169
+
170
+ commit_oids.each_with_index do |commit, step|
171
+ index_commit(repository_for_indexing.lookup(commit))
172
+ ObjectSpace.garbage_collect if step % 100 == 0
173
+ end
174
+ return commit_oids.count
175
+ end
176
+
177
+ 0
178
+ end
179
+
180
+ def index_commit(commit)
181
+ tries = 0
182
+
183
+ begin
184
+ client_for_indexing.index \
185
+ index: "#{self.class.index_name}",
186
+ type: self.class.name.underscore,
187
+ id: "#{repository_id}_#{commit.oid}",
188
+ body: {
189
+ commit: {
190
+ type: "commit",
191
+ rid: repository_id,
192
+ sha: commit.oid,
193
+ author: commit.author,
194
+ committer: commit.committer,
195
+ message: encode!(commit.message)
196
+ }
197
+ }
198
+ rescue Exception => ex
199
+ if tries < 2
200
+ tries += 1
201
+ sleep 1
202
+ retry
203
+ else
204
+ raise CreateIndexException, "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
205
+ end
206
+ end
207
+ end
208
+
209
+ def parse_revs(from_rev, to_rev)
210
+ from = if index_new_branch?(from_rev)
211
+ if to_rev == repository_for_indexing.last_commit.oid
212
+ nil
213
+ else
214
+ merge_base(to_rev)
215
+ end
216
+ else
217
+ from_rev
218
+ end
219
+
220
+ return from, to_rev
221
+ end
222
+
223
+ def index_new_branch?(from)
224
+ from == '0000000000000000000000000000000000000000'
225
+ end
226
+
227
+ # Representation of repository as indexed json
228
+ # Attention: It can be very very very huge hash
229
+ def as_indexed_json(options = {})
230
+ data = {}
231
+ data[:blobs] = index_blobs_array
232
+ data[:commits] = index_commits_array
233
+ data
234
+ end
235
+
236
+ # Indexing blob from current index
237
+ def index_blobs_array
238
+ result = []
239
+
240
+ target_sha = repository_for_indexing.head.target.oid
241
+
242
+ if repository_for_indexing.bare?
243
+ tree = repository_for_indexing.lookup(target_sha).tree
244
+ result.push(recurse_blobs_index_hash(tree))
245
+ else
246
+ repository_for_indexing.index.each do |blob|
247
+ b = LiteBlob.new(repository_for_indexing, blob)
248
+ result.push(
249
+ {
250
+ type: 'blob',
251
+ id: "#{target_sha}_#{b.path}",
252
+ rid: repository_id,
253
+ oid: b.id,
254
+ content: b.data,
255
+ commit_sha: target_sha
256
+ }
257
+ ) if b.text?
258
+ end
259
+ end
260
+
261
+ result
262
+ end
263
+
264
+ def recurse_blobs_index_hash(tree, path = "")
265
+ result = []
266
+
267
+ tree.each_blob do |blob|
268
+ blob[:path] = path + blob[:name]
269
+ b = LiteBlob.new(repository_for_indexing, blob)
270
+ result.push(
271
+ {
272
+ type: 'blob',
273
+ id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}",
274
+ rid: repository_id,
275
+ oid: b.id,
276
+ content: b.data,
277
+ commit_sha: repository_for_indexing.head.target.oid
278
+ }
279
+ ) if b.text?
280
+ end
281
+
282
+ tree.each_tree do |nested_tree|
283
+ result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
284
+ end
285
+
286
+ result.flatten
287
+ end
288
+
289
+ # Lookup all object ids for commit objects
290
+ def index_commits_array
291
+ res = []
292
+
293
+ repository_for_indexing.each_id do |oid|
294
+ obj = repository_for_indexing.lookup(oid)
295
+ if obj.type == :commit
296
+ res.push(
297
+ {
298
+ type: 'commit',
299
+ sha: obj.oid,
300
+ author: obj.author,
301
+ committer: obj.committer,
302
+ message: encode!(obj.message)
303
+ }
304
+ )
305
+ end
306
+ end
307
+
308
+ res
309
+ end
310
+
311
+ def search(query, type: :all, page: 1, per: 20, options: {})
312
+ options[:repository_id] = repository_id if options[:repository_id].nil?
313
+ self.class.search(query, type: type, page: page, per: per, options: options)
314
+ end
315
+
316
+ # Repository id used for identity data from different repositories
317
+ # Update this value if need
318
+ def set_repository_id id = nil
319
+ @repository_id = id || path_to_repo
320
+ end
321
+
322
+ # For Overwrite
323
+ def repository_id
324
+ @repository_id
325
+ end
326
+
327
+ # For Overwrite
328
+ def self.repositories_count
329
+ 10
330
+ end
331
+
332
+ unless defined?(path_to_repo)
333
+ def path_to_repo
334
+ if @path_to_repo.blank?
335
+ raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
336
+ else
337
+ @path_to_repo
338
+ end
339
+ end
340
+ end
341
+
342
+ def repository_for_indexing(repo_path = nil)
343
+ return @rugged_repo_indexer if defined? @rugged_repo_indexer
344
+
345
+ @path_to_repo ||= repo_path || path_to_repo
346
+
347
+ set_repository_id
348
+
349
+ @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
350
+ end
351
+
352
+ def client_for_indexing
353
+ @client_for_indexing ||= Elasticsearch::Client.new log: true
354
+ end
355
+
356
+ def self.search(query, type: :all, page: 1, per: 20, options: {})
357
+ results = { blobs: [], commits: []}
358
+
359
+ case type.to_sym
360
+ when :all
361
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
362
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
363
+ when :blob
364
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
365
+ when :commit
366
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
367
+ end
368
+
369
+ results
370
+ end
371
+
372
+ private
373
+
374
+ def merge_base(to_rev)
375
+ head_sha = repository_for_indexing.last_commit.oid
376
+ repository_for_indexing.merge_base(to_rev, head_sha)
377
+ end
378
+ end
379
+
380
+ module ClassMethods
381
+ def search_commit(query, page: 1, per: 20, options: {})
382
+ page ||= 1
383
+
384
+ fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
385
+
386
+ query_hash = {
387
+ query: {
388
+ filtered: {
389
+ query: {
390
+ multi_match: {
391
+ fields: fields,
392
+ query: "#{query}",
393
+ operator: :or
394
+ }
395
+ },
396
+ },
397
+ },
398
+ aggs: {
399
+ commitRepositoryFaset: {
400
+ terms: {
401
+ field: "commit.rid",
402
+ all_terms: true,
403
+ size: repositories_count
404
+ }
405
+ }
406
+ },
407
+ size: per,
408
+ from: per * (page - 1)
409
+ }
410
+
411
+ if query.blank?
412
+ query_hash[:query][:filtered][:query] = { match_all: {}}
413
+ query_hash[:track_scores] = true
414
+ end
415
+
416
+ if options[:repository_id]
417
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
418
+ query_hash[:query][:filtered][:filter][:and] << {
419
+ terms: {
420
+ "commit.rid" => [options[:repository_id]].flatten
421
+ }
422
+ }
423
+ end
424
+
425
+ if options[:highlight]
426
+ es_fields = fields.map { |field| field.split('^').first }.inject({}) do |memo, field|
427
+ memo[field.to_sym] = {}
428
+ memo
429
+ end
430
+
431
+ query_hash[:highlight] = {
432
+ pre_tags: ["gitlabelasticsearch→"],
433
+ post_tags: ["←gitlabelasticsearch"],
434
+ fields: es_fields
435
+ }
436
+ end
437
+
438
+ options[:order] = :default if options[:order].blank?
439
+
440
+ order = case options[:order].to_sym
441
+ when :recently_indexed
442
+ { _timestamp: { order: :desc, mode: :min } }
443
+ when :last_indexed
444
+ { _timestamp: { order: :asc, mode: :min } }
445
+ else
446
+ {}
447
+ end
448
+
449
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
450
+
451
+ res = self.__elasticsearch__.search(query_hash)
452
+ {
453
+ results: res.results,
454
+ total_count: res.size,
455
+ repositories: res.response["aggregations"]["commitRepositoryFaset"]["buckets"]
456
+ }
457
+ end
458
+
459
+ def search_blob(query, type: :all, page: 1, per: 20, options: {})
460
+ page ||= 1
461
+
462
+ query_hash = {
463
+ query: {
464
+ filtered: {
465
+ query: {
466
+ match: {
467
+ 'blob.content' => {
468
+ query: "#{query}",
469
+ operator: :and
470
+ }
471
+ }
472
+ }
473
+ }
474
+ },
475
+ aggs: {
476
+ languageFacet: {
477
+ terms: {
478
+ field: :language,
479
+ all_terms: true,
480
+ size: 20
481
+ }
482
+ },
483
+ blobRepositoryFaset: {
484
+ terms: {
485
+ field: :rid,
486
+ all_terms: true,
487
+ size: repositories_count
488
+ }
489
+ }
490
+ },
491
+ size: per,
492
+ from: per * (page - 1)
493
+ }
494
+
495
+ if options[:repository_id]
496
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
497
+ query_hash[:query][:filtered][:filter][:and] << {
498
+ terms: {
499
+ "blob.rid" => [options[:repository_id]].flatten
500
+ }
501
+ }
502
+ end
503
+
504
+ if options[:language]
505
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
506
+ query_hash[:query][:filtered][:filter][:and] << {
507
+ terms: {
508
+ "blob.language" => [options[:language]].flatten
509
+ }
510
+ }
511
+ end
512
+
513
+ options[:order] = :default if options[:order].blank?
514
+
515
+ order = case options[:order].to_sym
516
+ when :recently_indexed
517
+ { _timestamp: { order: :desc, mode: :min } }
518
+ when :last_indexed
519
+ { _timestamp: { order: :asc, mode: :min } }
520
+ else
521
+ {}
522
+ end
523
+
524
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
525
+
526
+ if options[:highlight]
527
+ query_hash[:highlight] = {
528
+ pre_tags: ["gitlabelasticsearch→"],
529
+ post_tags: ["←gitlabelasticsearch"],
530
+ fields: {
531
+ "blob.content" => {},
532
+ "type" => "fvh",
533
+ "boundary_chars" => "\n"
534
+ }
535
+ }
536
+ end
537
+
538
+ res = self.__elasticsearch__.search(query_hash)
539
+
540
+ {
541
+ results: res.results,
542
+ total_count: res.size,
543
+ languages: res.response["aggregations"]["languageFacet"]["buckets"],
544
+ repositories: res.response["aggregations"]["blobRepositoryFaset"]["buckets"]
545
+ }
546
+ end
547
+
548
+ def search_file_names(query, page: 1, per: 20, options: {})
549
+ query_hash = {
550
+ fields: ['blob.path'],
551
+ query: {
552
+ fuzzy: {
553
+ "repository.blob.path" => { value: query }
554
+ },
555
+ },
556
+ filter: {
557
+ term: {
558
+ "repository.blob.rid" => [options[:repository_id]].flatten
559
+ }
560
+ },
561
+ size: per,
562
+ from: per * (page - 1)
563
+ }
564
+
565
+ self.__elasticsearch__.search(query_hash)
566
+ end
567
+ end
568
+ end
569
+ end
570
+ end