elasticsearch-git 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/elasticsearch-git.gemspec +7 -7
- data/lib/elasticsearch/git/encoder_helper.rb +43 -0
- data/lib/elasticsearch/git/lite_blob.rb +25 -0
- data/lib/elasticsearch/git/model.rb +2 -2
- data/lib/elasticsearch/git/repository.rb +208 -110
- data/lib/elasticsearch/git/version.rb +1 -1
- data/{lib/test → test}/test_helper.rb +0 -0
- metadata +32 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d48d7fd7bd1dc6d71ce6c24024a746cb75d0b60e
|
4
|
+
data.tar.gz: e4297f9c88cecef626495998bbb3838ee8829f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0260393250e6bb3fdb4a757b363b5b48d5bd3851bbb923b53170ca88a8b40b7f6c7319a4f0ff6c4b14e4fa8ede81c70b3b6eb343617c624f8a9e961e366d039e
|
7
|
+
data.tar.gz: be0c4572531338cd2cfa595ce523b9bd16d65bc4139942319862f77fb00215bd85c63bdda4261f6da2b0142a373b7fb862a8f9426f47cff7a5b91481f29670ce
|
data/README.md
CHANGED
@@ -316,6 +316,10 @@ Project.last.repository.as_indexed_json
|
|
316
316
|
:message=>"first commit\n"}]}
|
317
317
|
```
|
318
318
|
|
319
|
+
## TODO
|
320
|
+
|
321
|
+
* Add Exceptions handlers for indexing (Error connections and timeouts)
|
322
|
+
|
319
323
|
## Contributing
|
320
324
|
|
321
325
|
1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
|
data/elasticsearch-git.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Andrey Kumanyaev"]
|
10
10
|
spec.email = ["me@zzet.org"]
|
11
11
|
spec.summary = %q{Elasticsearch integrations for git repositories.}
|
12
|
-
spec.description = %q{Elasticsearch integrations for git repositories.}
|
12
|
+
spec.description = %q{Elasticsearch integrations for indexing git repositories.}
|
13
13
|
spec.homepage = "https://github.com/zzet/elasticsearch-git"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -19,10 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_runtime_dependency 'elasticsearch-model'
|
22
|
-
spec.add_runtime_dependency 'elasticsearch-api'
|
23
|
-
spec.add_runtime_dependency 'rugged'
|
24
|
-
spec.add_runtime_dependency 'charlock_holmes'
|
25
|
-
spec.add_runtime_dependency 'gitlab-linguist'
|
26
|
-
spec.add_runtime_dependency 'activemodel'
|
27
|
-
spec.add_runtime_dependency 'activesupport'
|
22
|
+
spec.add_runtime_dependency 'elasticsearch-api', '> 0.4.0'
|
23
|
+
spec.add_runtime_dependency 'rugged', '~> 0.19.0'
|
24
|
+
spec.add_runtime_dependency 'charlock_holmes', '~> 0.6.9'
|
25
|
+
spec.add_runtime_dependency 'gitlab-linguist', '> 2.9.0'
|
26
|
+
spec.add_runtime_dependency 'activemodel', '~> 4.0.0'
|
27
|
+
spec.add_runtime_dependency 'activesupport', '~> 4.0.0'
|
28
28
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'charlock_holmes'
|
3
|
+
|
4
|
+
module Elasticsearch
|
5
|
+
module Git
|
6
|
+
module EncoderHelper
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
included do
|
10
|
+
def encode!(message)
|
11
|
+
return nil unless message.respond_to? :force_encoding
|
12
|
+
|
13
|
+
# if message is utf-8 encoding, just return it
|
14
|
+
message.force_encoding("UTF-8")
|
15
|
+
return message if message.valid_encoding?
|
16
|
+
|
17
|
+
# return message if message type is binary
|
18
|
+
detect = CharlockHolmes::EncodingDetector.detect(message)
|
19
|
+
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
20
|
+
|
21
|
+
# encoding message to detect encoding
|
22
|
+
if detect && detect[:encoding]
|
23
|
+
message.force_encoding(detect[:encoding])
|
24
|
+
end
|
25
|
+
|
26
|
+
# encode and clean the bad chars
|
27
|
+
message.replace clean(message)
|
28
|
+
rescue
|
29
|
+
encoding = detect ? detect[:encoding] : "unknown"
|
30
|
+
"--broken encoding: #{encoding}"
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def clean(message)
|
36
|
+
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
|
37
|
+
.encode("UTF-8")
|
38
|
+
.gsub("\0".encode("UTF-8"), "")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'linguist'
|
2
|
+
require 'elasticsearch/git/encoder_helper'
|
3
|
+
|
4
|
+
module Elasticsearch
|
5
|
+
module Git
|
6
|
+
class LiteBlob
|
7
|
+
include Linguist::BlobHelper
|
8
|
+
include Elasticsearch::Git::EncoderHelper
|
9
|
+
|
10
|
+
attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
|
11
|
+
|
12
|
+
def initialize(repo, raw_blob_hash)
|
13
|
+
@id = raw_blob_hash[:oid]
|
14
|
+
|
15
|
+
blob = repo.lookup(@id)
|
16
|
+
|
17
|
+
@mode = '%06o' % raw_blob_hash[:filemode]
|
18
|
+
@size = blob.size
|
19
|
+
@path = encode!(raw_blob_hash[:path])
|
20
|
+
@name = @path.split('/').last
|
21
|
+
@data = encode!(blob.content)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -42,7 +42,7 @@ module Elasticsearch
|
|
42
42
|
},
|
43
43
|
tokenizer: {
|
44
44
|
sha_tokenizer: {
|
45
|
-
type: "
|
45
|
+
type: "edgeNGram",
|
46
46
|
min_gram: 8,
|
47
47
|
max_gram: 40,
|
48
48
|
token_chars: %w(letter digit)
|
@@ -61,7 +61,7 @@ module Elasticsearch
|
|
61
61
|
max_gram: 20
|
62
62
|
},
|
63
63
|
sha_ngrams: {
|
64
|
-
type: "
|
64
|
+
type: "edgeNGram",
|
65
65
|
min_gram: 8,
|
66
66
|
max_gram: 40
|
67
67
|
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'active_support/concern'
|
2
2
|
require 'active_model'
|
3
3
|
require 'elasticsearch'
|
4
|
-
require 'elasticsearch/model'
|
4
|
+
require 'elasticsearch/git/model'
|
5
|
+
require 'elasticsearch/git/encoder_helper'
|
6
|
+
require 'elasticsearch/git/lite_blob'
|
5
7
|
require 'rugged'
|
6
|
-
require 'linguist'
|
7
8
|
|
8
9
|
module Elasticsearch
|
9
10
|
module Git
|
@@ -12,30 +13,37 @@ module Elasticsearch
|
|
12
13
|
|
13
14
|
included do
|
14
15
|
include Elasticsearch::Git::Model
|
16
|
+
include Elasticsearch::Git::EncoderHelper
|
15
17
|
|
16
|
-
mapping do
|
18
|
+
mapping _timestamp: { enabled: true } do
|
17
19
|
indexes :blob do
|
18
20
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
19
21
|
indexes :rid, type: :string, index: :not_analyzed
|
20
|
-
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :
|
21
|
-
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :
|
22
|
-
indexes :
|
22
|
+
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
23
|
+
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
24
|
+
indexes :path, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
25
|
+
indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
26
|
+
indexes :language, type: :string, index: :not_analyzed
|
23
27
|
end
|
28
|
+
|
24
29
|
indexes :commit do
|
25
30
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
26
31
|
indexes :rid, type: :string, index: :not_analyzed
|
27
|
-
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :
|
32
|
+
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
33
|
+
|
28
34
|
indexes :author do
|
29
|
-
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
30
|
-
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
35
|
+
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
36
|
+
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
31
37
|
indexes :time, type: :date
|
32
38
|
end
|
39
|
+
|
33
40
|
indexes :commiter do
|
34
|
-
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
35
|
-
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
41
|
+
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
42
|
+
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
36
43
|
indexes :time, type: :date
|
37
44
|
end
|
38
|
-
|
45
|
+
|
46
|
+
indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
39
47
|
end
|
40
48
|
end
|
41
49
|
|
@@ -79,7 +87,8 @@ module Elasticsearch
|
|
79
87
|
end
|
80
88
|
|
81
89
|
diff = repository_for_indexing.diff(from_rev, to_rev)
|
82
|
-
|
90
|
+
|
91
|
+
diff.deltas.reverse.each_with_index do |delta, step|
|
83
92
|
if delta.status == :deleted
|
84
93
|
b = LiteBlob.new(repository_for_indexing, delta.old_file)
|
85
94
|
delete_from_index_blob(b)
|
@@ -87,14 +96,20 @@ module Elasticsearch
|
|
87
96
|
b = LiteBlob.new(repository_for_indexing, delta.new_file)
|
88
97
|
index_blob(b, target_sha)
|
89
98
|
end
|
99
|
+
|
100
|
+
# Run GC every 100 blobs
|
101
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
90
102
|
end
|
91
103
|
else
|
92
104
|
if repository_for_indexing.bare?
|
93
105
|
recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
|
94
106
|
else
|
95
|
-
repository_for_indexing.index.
|
107
|
+
repository_for_indexing.index.each_with_index do |blob, step|
|
96
108
|
b = LiteBlob.new(repository_for_indexing, blob)
|
97
109
|
index_blob(b, target_sha)
|
110
|
+
|
111
|
+
# Run GC every 100 blobs
|
112
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
98
113
|
end
|
99
114
|
end
|
100
115
|
end
|
@@ -108,29 +123,51 @@ module Elasticsearch
|
|
108
123
|
index_blob(b, target_sha)
|
109
124
|
end
|
110
125
|
|
126
|
+
# Run GC every recurse step
|
127
|
+
ObjectSpace.garbage_collect
|
128
|
+
|
111
129
|
tree.each_tree do |nested_tree|
|
112
130
|
recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
|
113
131
|
end
|
114
132
|
end
|
115
133
|
|
116
134
|
def index_blob(blob, target_sha)
|
117
|
-
if blob
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
135
|
+
if can_index_blob?(blob)
|
136
|
+
tries = 0
|
137
|
+
begin
|
138
|
+
client_for_indexing.index \
|
139
|
+
index: "#{self.class.index_name}",
|
140
|
+
type: "repository",
|
141
|
+
id: "#{repository_id}_#{blob.path}",
|
142
|
+
body: {
|
143
|
+
blob: {
|
144
|
+
type: "blob",
|
145
|
+
oid: blob.id,
|
146
|
+
rid: repository_id,
|
147
|
+
content: blob.data,
|
148
|
+
commit_sha: target_sha,
|
149
|
+
path: blob.path,
|
150
|
+
language: blob.language ? blob.language.name : "Text"
|
151
|
+
}
|
129
152
|
}
|
130
|
-
|
153
|
+
rescue Exception => ex
|
154
|
+
# Retry 10 times send request
|
155
|
+
if tries < 10
|
156
|
+
tries += 1
|
157
|
+
sleep tries * 10 * rand(10)
|
158
|
+
retry
|
159
|
+
else
|
160
|
+
logger.warn "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
|
161
|
+
end
|
162
|
+
end
|
131
163
|
end
|
132
164
|
end
|
133
165
|
|
166
|
+
# Index text-like files which size less 1.mb
|
167
|
+
def can_index_blob?(blob)
|
168
|
+
blob.text? && (blob.size && blob.size.to_i < 1048576)
|
169
|
+
end
|
170
|
+
|
134
171
|
def delete_from_index_blob(blob)
|
135
172
|
if blob.text?
|
136
173
|
begin
|
@@ -140,6 +177,8 @@ module Elasticsearch
|
|
140
177
|
id: "#{repository_id}_#{blob.path}"
|
141
178
|
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
142
179
|
return true
|
180
|
+
rescue Exception => ex
|
181
|
+
logger.warn "Error with remove file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
|
143
182
|
end
|
144
183
|
end
|
145
184
|
end
|
@@ -167,52 +206,74 @@ module Elasticsearch
|
|
167
206
|
#
|
168
207
|
# For search from commits use type 'commit'
|
169
208
|
def index_commits(from_rev: nil, to_rev: nil)
|
170
|
-
|
209
|
+
to_rev = repository_for_indexing.head.target unless to_rev.present?
|
210
|
+
|
211
|
+
if to_rev != "0000000000000000000000000000000000000000"
|
212
|
+
# If to_rev correct
|
213
|
+
begin
|
214
|
+
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
215
|
+
rescue
|
216
|
+
raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
|
217
|
+
end
|
218
|
+
|
171
219
|
begin
|
172
220
|
if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
|
173
221
|
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
174
222
|
end
|
175
|
-
if to_rev != "0000000000000000000000000000000000000000"
|
176
|
-
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
177
|
-
end
|
178
223
|
rescue
|
179
224
|
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
180
225
|
end
|
181
226
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
227
|
+
# If pushed new branch no need reindex all repository
|
228
|
+
# Find merge_base and reindex diff
|
229
|
+
if from_rev == "0000000000000000000000000000000000000000" && to_rev != repository_for_indexing.head.target
|
230
|
+
from_rev = repository_for_indexing.merge_base(to_rev, repository_for_indexing.head.target)
|
231
|
+
end
|
187
232
|
|
188
|
-
walker
|
189
|
-
|
233
|
+
walker = Rugged::Walker.new(repository_for_indexing)
|
234
|
+
walker.push(to_rev)
|
235
|
+
|
236
|
+
if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
|
237
|
+
walker.hide(from_rev)
|
190
238
|
end
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
239
|
+
|
240
|
+
commits = walker.map { |c| c.oid }
|
241
|
+
walker.reset
|
242
|
+
|
243
|
+
commits.each_with_index do |commit, step|
|
244
|
+
index_commit(repository_for_indexing.lookup(commit))
|
245
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
197
246
|
end
|
198
247
|
end
|
199
248
|
end
|
200
249
|
|
201
250
|
def index_commit(commit)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
251
|
+
tries = 0
|
252
|
+
begin
|
253
|
+
client_for_indexing.index \
|
254
|
+
index: "#{self.class.index_name}",
|
255
|
+
type: "repository",
|
256
|
+
id: "#{repository_id}_#{commit.oid}",
|
257
|
+
body: {
|
258
|
+
commit: {
|
259
|
+
type: "commit",
|
260
|
+
rid: repository_id,
|
261
|
+
sha: commit.oid,
|
262
|
+
author: commit.author,
|
263
|
+
committer: commit.committer,
|
264
|
+
message: encode!(commit.message)
|
265
|
+
}
|
214
266
|
}
|
215
|
-
|
267
|
+
rescue Exception => ex
|
268
|
+
# Retry 10 times send request
|
269
|
+
if tries < 10
|
270
|
+
tries += 1
|
271
|
+
sleep tries * 10 * rand(10)
|
272
|
+
retry
|
273
|
+
else
|
274
|
+
logger.warn "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
|
275
|
+
end
|
276
|
+
end
|
216
277
|
end
|
217
278
|
|
218
279
|
# Representation of repository as indexed json
|
@@ -235,7 +296,7 @@ module Elasticsearch
|
|
235
296
|
result.push(recurse_blobs_index_hash(tree))
|
236
297
|
else
|
237
298
|
repository_for_indexing.index.each do |blob|
|
238
|
-
b =
|
299
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
239
300
|
result.push(
|
240
301
|
{
|
241
302
|
type: 'blob',
|
@@ -290,7 +351,7 @@ module Elasticsearch
|
|
290
351
|
sha: obj.oid,
|
291
352
|
author: obj.author,
|
292
353
|
committer: obj.committer,
|
293
|
-
message: obj.message
|
354
|
+
message: encode!(obj.message)
|
294
355
|
}
|
295
356
|
)
|
296
357
|
end
|
@@ -326,18 +387,18 @@ module Elasticsearch
|
|
326
387
|
end
|
327
388
|
|
328
389
|
def repository_for_indexing(repo_path = "")
|
390
|
+
return @rugged_repo_indexer if defined? @rugged_repo_indexer
|
391
|
+
|
329
392
|
@path_to_repo ||= repo_path
|
330
393
|
set_repository_id
|
331
|
-
Rugged::Repository.new(@path_to_repo)
|
394
|
+
@rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
|
332
395
|
end
|
333
396
|
|
334
397
|
def client_for_indexing
|
335
398
|
@client_for_indexing ||= Elasticsearch::Client.new log: true
|
336
399
|
end
|
337
|
-
end
|
338
400
|
|
339
|
-
|
340
|
-
def search(query, type: :all, page: 1, per: 20, options: {})
|
401
|
+
def self.search(query, type: :all, page: 1, per: 20, options: {})
|
341
402
|
results = { blobs: [], commits: []}
|
342
403
|
case type.to_sym
|
343
404
|
when :all
|
@@ -352,6 +413,12 @@ module Elasticsearch
|
|
352
413
|
results
|
353
414
|
end
|
354
415
|
|
416
|
+
def logger
|
417
|
+
@logger ||= Logger.new(STDOUT)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
module ClassMethods
|
355
422
|
def search_commit(query, page: 1, per: 20, options: {})
|
356
423
|
page ||= 1
|
357
424
|
|
@@ -364,11 +431,19 @@ module Elasticsearch
|
|
364
431
|
multi_match: {
|
365
432
|
fields: fields,
|
366
433
|
query: "#{query}",
|
367
|
-
operator: :
|
434
|
+
operator: :or
|
368
435
|
}
|
369
436
|
},
|
370
437
|
},
|
371
438
|
},
|
439
|
+
facets: {
|
440
|
+
commitRepositoryFaset: {
|
441
|
+
terms: {
|
442
|
+
field: "commit.rid",
|
443
|
+
all_term: true
|
444
|
+
}
|
445
|
+
}
|
446
|
+
},
|
372
447
|
size: per,
|
373
448
|
from: per * (page - 1)
|
374
449
|
}
|
@@ -387,11 +462,28 @@ module Elasticsearch
|
|
387
462
|
}
|
388
463
|
end
|
389
464
|
|
465
|
+
options[:order] = :default if options[:order].blank?
|
466
|
+
order = case options[:order].to_sym
|
467
|
+
when :recently_indexed
|
468
|
+
{ _timestamp: { order: :desc, mode: :min } }
|
469
|
+
when :last_indexed
|
470
|
+
{ _timestamp: { order: :asc, mode: :min } }
|
471
|
+
else
|
472
|
+
{}
|
473
|
+
end
|
474
|
+
|
475
|
+
query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
|
476
|
+
|
390
477
|
if options[:highlight]
|
391
|
-
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
478
|
+
#query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
392
479
|
end
|
393
480
|
|
394
|
-
self.__elasticsearch__.search(query_hash)
|
481
|
+
res = self.__elasticsearch__.search(query_hash)
|
482
|
+
{
|
483
|
+
results: res.results,
|
484
|
+
total_count: res.total_count,
|
485
|
+
repositories: res.response["facets"]["commitRepositoryFaset"]["terms"]
|
486
|
+
}
|
395
487
|
end
|
396
488
|
|
397
489
|
def search_blob(query, type: :all, page: 1, per: 20, options: {})
|
@@ -404,12 +496,26 @@ module Elasticsearch
|
|
404
496
|
match: {
|
405
497
|
'blob.content' => {
|
406
498
|
query: "#{query}",
|
407
|
-
operator: :
|
499
|
+
operator: :or
|
408
500
|
}
|
409
501
|
}
|
410
502
|
}
|
411
503
|
}
|
412
504
|
},
|
505
|
+
facets: {
|
506
|
+
languageFacet: {
|
507
|
+
terms: {
|
508
|
+
field: :language,
|
509
|
+
all_term: true
|
510
|
+
}
|
511
|
+
},
|
512
|
+
blobRepositoryFaset: {
|
513
|
+
terms: {
|
514
|
+
field: :rid,
|
515
|
+
all_term: true
|
516
|
+
}
|
517
|
+
}
|
518
|
+
},
|
413
519
|
size: per,
|
414
520
|
from: per * (page - 1)
|
415
521
|
}
|
@@ -423,56 +529,48 @@ module Elasticsearch
|
|
423
529
|
}
|
424
530
|
end
|
425
531
|
|
426
|
-
if options[:
|
427
|
-
query_hash[:
|
532
|
+
if options[:language]
|
533
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
534
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
535
|
+
terms: {
|
536
|
+
"blob.language" => [options[:language]].flatten
|
537
|
+
}
|
538
|
+
}
|
428
539
|
end
|
429
540
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
541
|
+
options[:order] = :default if options[:order].blank?
|
542
|
+
order = case options[:order].to_sym
|
543
|
+
when :recently_indexed
|
544
|
+
{ _timestamp: { order: :desc, mode: :min } }
|
545
|
+
when :last_indexed
|
546
|
+
{ _timestamp: { order: :asc, mode: :min } }
|
547
|
+
else
|
548
|
+
{}
|
549
|
+
end
|
437
550
|
|
438
|
-
|
551
|
+
query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
|
439
552
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
message.force_encoding("UTF-8")
|
452
|
-
return message if message.valid_encoding?
|
553
|
+
if options[:highlight]
|
554
|
+
query_hash[:highlight] = {
|
555
|
+
pre_tags: [""],
|
556
|
+
post_tags: [""],
|
557
|
+
fields: {
|
558
|
+
"blob.content" => {},
|
559
|
+
"type" => "fvh",
|
560
|
+
"boundary_chars" => "\n"
|
561
|
+
}
|
562
|
+
}
|
563
|
+
end
|
453
564
|
|
454
|
-
|
455
|
-
detect = CharlockHolmes::EncodingDetector.detect(message)
|
456
|
-
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
565
|
+
res = self.__elasticsearch__.search(query_hash)
|
457
566
|
|
458
|
-
|
459
|
-
|
460
|
-
|
567
|
+
{
|
568
|
+
results: res.results,
|
569
|
+
total_count: res.total_count,
|
570
|
+
languages: res.response["facets"]["languageFacet"]["terms"],
|
571
|
+
repositories: res.response["facets"]["blobRepositoryFaset"]["terms"]
|
572
|
+
}
|
461
573
|
end
|
462
|
-
|
463
|
-
# encode and clean the bad chars
|
464
|
-
message.replace clean(message)
|
465
|
-
rescue
|
466
|
-
encoding = detect ? detect[:encoding] : "unknown"
|
467
|
-
"--broken encoding: #{encoding}"
|
468
|
-
end
|
469
|
-
|
470
|
-
private
|
471
|
-
|
472
|
-
def clean(message)
|
473
|
-
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
|
474
|
-
.encode("UTF-8")
|
475
|
-
.gsub("\0".encode("UTF-8"), "")
|
476
574
|
end
|
477
575
|
end
|
478
576
|
end
|
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch-git
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey Kumanyaev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch-model
|
@@ -28,87 +28,87 @@ dependencies:
|
|
28
28
|
name: elasticsearch-api
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - '
|
31
|
+
- - '>'
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - '
|
38
|
+
- - '>'
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rugged
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.19.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.19.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: charlock_holmes
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.6.9
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.6.9
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: gitlab-linguist
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - '
|
73
|
+
- - '>'
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 2.9.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - '
|
80
|
+
- - '>'
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: 2.9.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: activemodel
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ~>
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 4.0.0
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ~>
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 4.0.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: activesupport
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ~>
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
103
|
+
version: 4.0.0
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ~>
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
111
|
-
description: Elasticsearch integrations for git repositories.
|
110
|
+
version: 4.0.0
|
111
|
+
description: Elasticsearch integrations for indexing git repositories.
|
112
112
|
email:
|
113
113
|
- me@zzet.org
|
114
114
|
executables: []
|
@@ -122,10 +122,12 @@ files:
|
|
122
122
|
- Rakefile
|
123
123
|
- elasticsearch-git.gemspec
|
124
124
|
- lib/elasticsearch/git.rb
|
125
|
+
- lib/elasticsearch/git/encoder_helper.rb
|
126
|
+
- lib/elasticsearch/git/lite_blob.rb
|
125
127
|
- lib/elasticsearch/git/model.rb
|
126
128
|
- lib/elasticsearch/git/repository.rb
|
127
129
|
- lib/elasticsearch/git/version.rb
|
128
|
-
-
|
130
|
+
- test/test_helper.rb
|
129
131
|
homepage: https://github.com/zzet/elasticsearch-git
|
130
132
|
licenses:
|
131
133
|
- MIT
|
@@ -150,4 +152,5 @@ rubygems_version: 2.0.3
|
|
150
152
|
signing_key:
|
151
153
|
specification_version: 4
|
152
154
|
summary: Elasticsearch integrations for git repositories.
|
153
|
-
test_files:
|
155
|
+
test_files:
|
156
|
+
- test/test_helper.rb
|