gitlab-elasticsearch-git 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/lib/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch/git/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "gitlab-elasticsearch-git"
8
+ spec.version = Elasticsearch::Git::VERSION
9
+ spec.authors = ["Andrey Kumanyaev", "Evgeniy Sokovikov", "GitLab B.V."]
10
+ spec.email = ["me@zzet.org", "skv-headless@yandex.ru"]
11
+ spec.summary = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for indexing git repositories.}
13
+ spec.homepage = "https://gitlab.com/gitlab-org/gitlab-elasticsearch-git"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'elasticsearch-model'
22
+ spec.add_runtime_dependency 'elasticsearch-api', '~> 0.4.0'
23
+ spec.add_runtime_dependency 'rugged', '~> 0.23.3'
24
+ spec.add_runtime_dependency 'charlock_holmes', '~> 0.7.3'
25
+ spec.add_runtime_dependency 'github-linguist', '~> 4.7.0'
26
+ spec.add_runtime_dependency 'activemodel', '~> 4.2.0'
27
+ spec.add_runtime_dependency 'activesupport', '~> 4.2.0'
28
+ end
@@ -0,0 +1,9 @@
1
+ require "elasticsearch/git/version"
2
+ require "elasticsearch/git/model"
3
+ require "elasticsearch/git/repository"
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ end
8
+ end
9
+
@@ -0,0 +1,43 @@
1
+ require 'active_support/concern'
2
+ require 'charlock_holmes'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ module EncoderHelper
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ def encode!(message)
11
+ return nil unless message.respond_to? :force_encoding
12
+
13
+ # if message is utf-8 encoding, just return it
14
+ message.force_encoding("UTF-8")
15
+ return message if message.valid_encoding?
16
+
17
+ # return message if message type is binary
18
+ detect = CharlockHolmes::EncodingDetector.detect(message)
19
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
20
+
21
+ # encoding message to detect encoding
22
+ if detect && detect[:encoding]
23
+ message.force_encoding(detect[:encoding])
24
+ end
25
+
26
+ # encode and clean the bad chars
27
+ message.replace clean(message)
28
+ rescue
29
+ encoding = detect ? detect[:encoding] : "unknown"
30
+ "--broken encoding: #{encoding}"
31
+ end
32
+
33
+ private
34
+
35
+ def clean(message)
36
+ message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
37
+ .encode("UTF-8")
38
+ .gsub("\0".encode("UTF-8"), "")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,25 @@
1
+ require 'linguist'
2
+ require 'elasticsearch/git/encoder_helper'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ class LiteBlob
7
+ include Linguist::BlobHelper
8
+ include Elasticsearch::Git::EncoderHelper
9
+
10
+ attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
11
+
12
+ def initialize(repo, raw_blob_hash)
13
+ @id = raw_blob_hash[:oid]
14
+
15
+ blob = repo.lookup(@id)
16
+
17
+ @mode = raw_blob_hash[:mode].to_s(8)
18
+ @size = blob.size
19
+ @path = encode!(raw_blob_hash[:path])
20
+ @name = @path.split('/').last
21
+ @data = encode!(blob.content)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,92 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch/model'
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ module Model
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ extend ActiveModel::Naming
12
+ include ActiveModel::Model
13
+ include Elasticsearch::Model
14
+
15
+ env = if defined?(::Rails)
16
+ ::Rails.env.to_s
17
+ else
18
+ "undefined"
19
+ end
20
+
21
+ index_name [self.name.downcase, 'index', env].join('-')
22
+
23
+ settings \
24
+ index: {
25
+ analysis: {
26
+ analyzer: {
27
+ human_analyzer: {
28
+ type: 'custom',
29
+ tokenizer: 'human_tokenizer',
30
+ filter: %w(lowercase asciifolding human_ngrams)
31
+ },
32
+ path_analyzer: {
33
+ type: 'custom',
34
+ tokenizer: 'path_tokenizer',
35
+ filter: %w(lowercase asciifolding path_ngrams)
36
+ },
37
+ sha_analyzer: {
38
+ type: 'custom',
39
+ tokenizer: 'sha_tokenizer',
40
+ filter: %w(lowercase asciifolding sha_ngrams)
41
+ },
42
+ code_analyzer: {
43
+ type: 'custom',
44
+ tokenizer: 'standard',
45
+ filter: %w(lowercase asciifolding code_stemmer)
46
+ }
47
+ },
48
+ tokenizer: {
49
+ sha_tokenizer: {
50
+ type: "edgeNGram",
51
+ min_gram: 8,
52
+ max_gram: 40,
53
+ token_chars: %w(letter digit)
54
+ },
55
+ human_tokenizer: {
56
+ type: "nGram",
57
+ min_gram: 1,
58
+ max_gram: 20,
59
+ token_chars: %w(letter digit)
60
+ },
61
+ path_tokenizer: {
62
+ type: 'path_hierarchy',
63
+ reverse: true
64
+ },
65
+ },
66
+ filter: {
67
+ human_ngrams: {
68
+ type: "nGram",
69
+ min_gram: 1,
70
+ max_gram: 20
71
+ },
72
+ sha_ngrams: {
73
+ type: "edgeNGram",
74
+ min_gram: 8,
75
+ max_gram: 40
76
+ },
77
+ path_ngrams: {
78
+ type: "edgeNGram",
79
+ min_gram: 3,
80
+ max_gram: 15
81
+ },
82
+ code_stemmer: {
83
+ type: "stemmer",
84
+ name: "minimal_english"
85
+ }
86
+ }
87
+ }
88
+ }
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,570 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch'
4
+ require 'elasticsearch/git/model'
5
+ require 'elasticsearch/git/encoder_helper'
6
+ require 'elasticsearch/git/lite_blob'
7
+ require 'rugged'
8
+
9
+ module Elasticsearch
10
+ module Git
11
+ module Repository
12
+ class CreateIndexException < StandardError; end
13
+
14
+ extend ActiveSupport::Concern
15
+
16
+ included do
17
+ include Elasticsearch::Git::Model
18
+ include Elasticsearch::Git::EncoderHelper
19
+
20
+ mapping _timestamp: { enabled: true } do
21
+ indexes :blob do
22
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, analyzer: :human_analyzer
23
+ indexes :rid, type: :string, index: :not_analyzed
24
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
25
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
26
+ indexes :path, type: :string, search_analyzer: :path_analyzer, analyzer: :path_analyzer
27
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
28
+ indexes :language, type: :string, index: :not_analyzed
29
+ end
30
+
31
+ indexes :commit do
32
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, analyzer: :human_analyzer
33
+ indexes :rid, type: :string, index: :not_analyzed
34
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
35
+
36
+ indexes :author do
37
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
38
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
39
+ indexes :time, type: :date
40
+ end
41
+
42
+ indexes :commiter do
43
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
44
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
45
+ indexes :time, type: :date
46
+ end
47
+
48
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, analyzer: :code_analyzer
49
+ end
50
+ end
51
+
52
+ # Indexing all text-like blobs in repository
53
+ #
54
+ # All data stored in global index
55
+ # Repository can be selected by 'rid' field
56
+ # If you want - this field can be used for store 'project' id
57
+ #
58
+ # blob {
59
+ # id - uniq id of blob from all repositories
60
+ # oid - blob id in repository
61
+ # content - blob content
62
+ # commit_sha - last actual commit sha
63
+ # }
64
+ #
65
+ # For search from blobs use type 'blob'
66
+ def index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
67
+ from, to = parse_revs(from_rev, to_rev)
68
+
69
+ diff = repository_for_indexing.diff(from, to)
70
+
71
+ diff.deltas.reverse.each_with_index do |delta, step|
72
+ if delta.status == :deleted
73
+ next if delta.old_file[:mode].to_s(8) == "160000"
74
+ b = LiteBlob.new(repository_for_indexing, delta.old_file)
75
+ delete_from_index_blob(b)
76
+ else
77
+ next if delta.new_file[:mode].to_s(8) == "160000"
78
+ b = LiteBlob.new(repository_for_indexing, delta.new_file)
79
+ index_blob(b, to)
80
+ end
81
+
82
+ # Run GC every 100 blobs
83
+ ObjectSpace.garbage_collect if step % 100 == 0
84
+ end
85
+ end
86
+
87
+ def index_blob(blob, target_sha)
88
+ if can_index_blob?(blob)
89
+ tries = 0
90
+
91
+ begin
92
+ client_for_indexing.index \
93
+ index: "#{self.class.index_name}",
94
+ type: self.class.name.underscore,
95
+ id: "#{repository_id}_#{blob.path}",
96
+ body: {
97
+ blob: {
98
+ type: "blob",
99
+ oid: blob.id,
100
+ rid: repository_id,
101
+ content: blob.data,
102
+ commit_sha: target_sha,
103
+ path: blob.path,
104
+ language: blob.language ? blob.language.name : "Text"
105
+ }
106
+ }
107
+ rescue Exception => ex
108
+ if tries < 2
109
+ tries += 1
110
+ sleep 1
111
+ retry
112
+ else
113
+ raise CreateIndexException, "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ # Index text-like files which size less 1.mb
120
+ def can_index_blob?(blob)
121
+ blob.text? && (blob.size && blob.size.to_i < 1048576)
122
+ end
123
+
124
+ def delete_from_index_blob(blob)
125
+ if blob.text?
126
+ begin
127
+ client_for_indexing.delete \
128
+ index: "#{self.class.index_name}",
129
+ type: "repository",
130
+ id: "#{repository_id}_#{blob.path}"
131
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
132
+ return true
133
+ rescue Exception => ex
134
+ raise CreateIndexException, "Error with removing file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
135
+ end
136
+ end
137
+ end
138
+
139
+ # Indexing all commits in repository
140
+ #
141
+ # All data stored in global index
142
+ # Repository can be filtered by 'rid' field
143
+ # If you want - this field can be used git store 'project' id
144
+ #
145
+ # commit {
146
+ # sha - commit sha
147
+ # author {
148
+ # name - commit author name
149
+ # email - commit author email
150
+ # time - commit time
151
+ # }
152
+ # commiter {
153
+ # name - committer name
154
+ # email - committer email
155
+ # time - commit time
156
+ # }
157
+ # message - commit message
158
+ # }
159
+ #
160
+ # For search from commits use type 'commit'
161
+ def index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
162
+ from, to = parse_revs(from_rev, to_rev)
163
+ range = [from, to].reject(&:nil?).join('..')
164
+ out, err, status = Open3.capture3("git log #{range} --format=\"%H\"", chdir: repository_for_indexing.path)
165
+
166
+ if status.success? && err.blank?
167
+ #TODO use rugged walker!!!
168
+ commit_oids = out.split("\n")
169
+
170
+ commit_oids.each_with_index do |commit, step|
171
+ index_commit(repository_for_indexing.lookup(commit))
172
+ ObjectSpace.garbage_collect if step % 100 == 0
173
+ end
174
+ return commit_oids.count
175
+ end
176
+
177
+ 0
178
+ end
179
+
180
+ def index_commit(commit)
181
+ tries = 0
182
+
183
+ begin
184
+ client_for_indexing.index \
185
+ index: "#{self.class.index_name}",
186
+ type: self.class.name.underscore,
187
+ id: "#{repository_id}_#{commit.oid}",
188
+ body: {
189
+ commit: {
190
+ type: "commit",
191
+ rid: repository_id,
192
+ sha: commit.oid,
193
+ author: commit.author,
194
+ committer: commit.committer,
195
+ message: encode!(commit.message)
196
+ }
197
+ }
198
+ rescue Exception => ex
199
+ if tries < 2
200
+ tries += 1
201
+ sleep 1
202
+ retry
203
+ else
204
+ raise CreateIndexException, "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
205
+ end
206
+ end
207
+ end
208
+
209
+ def parse_revs(from_rev, to_rev)
210
+ from = if index_new_branch?(from_rev)
211
+ if to_rev == repository_for_indexing.last_commit.oid
212
+ nil
213
+ else
214
+ merge_base(to_rev)
215
+ end
216
+ else
217
+ from_rev
218
+ end
219
+
220
+ return from, to_rev
221
+ end
222
+
223
+ def index_new_branch?(from)
224
+ from == '0000000000000000000000000000000000000000'
225
+ end
226
+
227
+ # Representation of repository as indexed json
228
+ # Attention: It can be very very very huge hash
229
+ def as_indexed_json(options = {})
230
+ data = {}
231
+ data[:blobs] = index_blobs_array
232
+ data[:commits] = index_commits_array
233
+ data
234
+ end
235
+
236
+ # Indexing blob from current index
237
+ def index_blobs_array
238
+ result = []
239
+
240
+ target_sha = repository_for_indexing.head.target.oid
241
+
242
+ if repository_for_indexing.bare?
243
+ tree = repository_for_indexing.lookup(target_sha).tree
244
+ result.push(recurse_blobs_index_hash(tree))
245
+ else
246
+ repository_for_indexing.index.each do |blob|
247
+ b = LiteBlob.new(repository_for_indexing, blob)
248
+ result.push(
249
+ {
250
+ type: 'blob',
251
+ id: "#{target_sha}_#{b.path}",
252
+ rid: repository_id,
253
+ oid: b.id,
254
+ content: b.data,
255
+ commit_sha: target_sha
256
+ }
257
+ ) if b.text?
258
+ end
259
+ end
260
+
261
+ result
262
+ end
263
+
264
+ def recurse_blobs_index_hash(tree, path = "")
265
+ result = []
266
+
267
+ tree.each_blob do |blob|
268
+ blob[:path] = path + blob[:name]
269
+ b = LiteBlob.new(repository_for_indexing, blob)
270
+ result.push(
271
+ {
272
+ type: 'blob',
273
+ id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}",
274
+ rid: repository_id,
275
+ oid: b.id,
276
+ content: b.data,
277
+ commit_sha: repository_for_indexing.head.target.oid
278
+ }
279
+ ) if b.text?
280
+ end
281
+
282
+ tree.each_tree do |nested_tree|
283
+ result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
284
+ end
285
+
286
+ result.flatten
287
+ end
288
+
289
+ # Lookup all object ids for commit objects
290
+ def index_commits_array
291
+ res = []
292
+
293
+ repository_for_indexing.each_id do |oid|
294
+ obj = repository_for_indexing.lookup(oid)
295
+ if obj.type == :commit
296
+ res.push(
297
+ {
298
+ type: 'commit',
299
+ sha: obj.oid,
300
+ author: obj.author,
301
+ committer: obj.committer,
302
+ message: encode!(obj.message)
303
+ }
304
+ )
305
+ end
306
+ end
307
+
308
+ res
309
+ end
310
+
311
+ def search(query, type: :all, page: 1, per: 20, options: {})
312
+ options[:repository_id] = repository_id if options[:repository_id].nil?
313
+ self.class.search(query, type: type, page: page, per: per, options: options)
314
+ end
315
+
316
+ # Repository id used for identity data from different repositories
317
+ # Update this value if need
318
+ def set_repository_id id = nil
319
+ @repository_id = id || path_to_repo
320
+ end
321
+
322
+ # For Overwrite
323
+ def repository_id
324
+ @repository_id
325
+ end
326
+
327
+ # For Overwrite
328
+ def self.repositories_count
329
+ 10
330
+ end
331
+
332
+ unless defined?(path_to_repo)
333
+ def path_to_repo
334
+ if @path_to_repo.blank?
335
+ raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
336
+ else
337
+ @path_to_repo
338
+ end
339
+ end
340
+ end
341
+
342
+ def repository_for_indexing(repo_path = nil)
343
+ return @rugged_repo_indexer if defined? @rugged_repo_indexer
344
+
345
+ @path_to_repo ||= repo_path || path_to_repo
346
+
347
+ set_repository_id
348
+
349
+ @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
350
+ end
351
+
352
+ def client_for_indexing
353
+ @client_for_indexing ||= Elasticsearch::Client.new log: true
354
+ end
355
+
356
+ def self.search(query, type: :all, page: 1, per: 20, options: {})
357
+ results = { blobs: [], commits: []}
358
+
359
+ case type.to_sym
360
+ when :all
361
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
362
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
363
+ when :blob
364
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
365
+ when :commit
366
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
367
+ end
368
+
369
+ results
370
+ end
371
+
372
+ private
373
+
374
+ def merge_base(to_rev)
375
+ head_sha = repository_for_indexing.last_commit.oid
376
+ repository_for_indexing.merge_base(to_rev, head_sha)
377
+ end
378
+ end
379
+
380
+ module ClassMethods
381
+ def search_commit(query, page: 1, per: 20, options: {})
382
+ page ||= 1
383
+
384
+ fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
385
+
386
+ query_hash = {
387
+ query: {
388
+ filtered: {
389
+ query: {
390
+ multi_match: {
391
+ fields: fields,
392
+ query: "#{query}",
393
+ operator: :or
394
+ }
395
+ },
396
+ },
397
+ },
398
+ aggs: {
399
+ commitRepositoryFaset: {
400
+ terms: {
401
+ field: "commit.rid",
402
+ all_terms: true,
403
+ size: repositories_count
404
+ }
405
+ }
406
+ },
407
+ size: per,
408
+ from: per * (page - 1)
409
+ }
410
+
411
+ if query.blank?
412
+ query_hash[:query][:filtered][:query] = { match_all: {}}
413
+ query_hash[:track_scores] = true
414
+ end
415
+
416
+ if options[:repository_id]
417
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
418
+ query_hash[:query][:filtered][:filter][:and] << {
419
+ terms: {
420
+ "commit.rid" => [options[:repository_id]].flatten
421
+ }
422
+ }
423
+ end
424
+
425
+ if options[:highlight]
426
+ es_fields = fields.map { |field| field.split('^').first }.inject({}) do |memo, field|
427
+ memo[field.to_sym] = {}
428
+ memo
429
+ end
430
+
431
+ query_hash[:highlight] = {
432
+ pre_tags: ["gitlabelasticsearch→"],
433
+ post_tags: ["←gitlabelasticsearch"],
434
+ fields: es_fields
435
+ }
436
+ end
437
+
438
+ options[:order] = :default if options[:order].blank?
439
+
440
+ order = case options[:order].to_sym
441
+ when :recently_indexed
442
+ { _timestamp: { order: :desc, mode: :min } }
443
+ when :last_indexed
444
+ { _timestamp: { order: :asc, mode: :min } }
445
+ else
446
+ {}
447
+ end
448
+
449
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
450
+
451
+ res = self.__elasticsearch__.search(query_hash)
452
+ {
453
+ results: res.results,
454
+ total_count: res.size,
455
+ repositories: res.response["aggregations"]["commitRepositoryFaset"]["buckets"]
456
+ }
457
+ end
458
+
459
+ def search_blob(query, type: :all, page: 1, per: 20, options: {})
460
+ page ||= 1
461
+
462
+ query_hash = {
463
+ query: {
464
+ filtered: {
465
+ query: {
466
+ match: {
467
+ 'blob.content' => {
468
+ query: "#{query}",
469
+ operator: :and
470
+ }
471
+ }
472
+ }
473
+ }
474
+ },
475
+ aggs: {
476
+ languageFacet: {
477
+ terms: {
478
+ field: :language,
479
+ all_terms: true,
480
+ size: 20
481
+ }
482
+ },
483
+ blobRepositoryFaset: {
484
+ terms: {
485
+ field: :rid,
486
+ all_terms: true,
487
+ size: repositories_count
488
+ }
489
+ }
490
+ },
491
+ size: per,
492
+ from: per * (page - 1)
493
+ }
494
+
495
+ if options[:repository_id]
496
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
497
+ query_hash[:query][:filtered][:filter][:and] << {
498
+ terms: {
499
+ "blob.rid" => [options[:repository_id]].flatten
500
+ }
501
+ }
502
+ end
503
+
504
+ if options[:language]
505
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
506
+ query_hash[:query][:filtered][:filter][:and] << {
507
+ terms: {
508
+ "blob.language" => [options[:language]].flatten
509
+ }
510
+ }
511
+ end
512
+
513
+ options[:order] = :default if options[:order].blank?
514
+
515
+ order = case options[:order].to_sym
516
+ when :recently_indexed
517
+ { _timestamp: { order: :desc, mode: :min } }
518
+ when :last_indexed
519
+ { _timestamp: { order: :asc, mode: :min } }
520
+ else
521
+ {}
522
+ end
523
+
524
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
525
+
526
+ if options[:highlight]
527
+ query_hash[:highlight] = {
528
+ pre_tags: ["gitlabelasticsearch→"],
529
+ post_tags: ["←gitlabelasticsearch"],
530
+ fields: {
531
+ "blob.content" => {},
532
+ "type" => "fvh",
533
+ "boundary_chars" => "\n"
534
+ }
535
+ }
536
+ end
537
+
538
+ res = self.__elasticsearch__.search(query_hash)
539
+
540
+ {
541
+ results: res.results,
542
+ total_count: res.size,
543
+ languages: res.response["aggregations"]["languageFacet"]["buckets"],
544
+ repositories: res.response["aggregations"]["blobRepositoryFaset"]["buckets"]
545
+ }
546
+ end
547
+
548
+ def search_file_names(query, page: 1, per: 20, options: {})
549
+ query_hash = {
550
+ fields: ['blob.path'],
551
+ query: {
552
+ fuzzy: {
553
+ "repository.blob.path" => { value: query }
554
+ },
555
+ },
556
+ filter: {
557
+ term: {
558
+ "repository.blob.rid" => [options[:repository_id]].flatten
559
+ }
560
+ },
561
+ size: per,
562
+ from: per * (page - 1)
563
+ }
564
+
565
+ self.__elasticsearch__.search(query_hash)
566
+ end
567
+ end
568
+ end
569
+ end
570
+ end