elasticsearch-git 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/elasticsearch-git.gemspec +7 -7
- data/lib/elasticsearch/git/encoder_helper.rb +43 -0
- data/lib/elasticsearch/git/lite_blob.rb +25 -0
- data/lib/elasticsearch/git/model.rb +2 -2
- data/lib/elasticsearch/git/repository.rb +208 -110
- data/lib/elasticsearch/git/version.rb +1 -1
- data/{lib/test → test}/test_helper.rb +0 -0
- metadata +32 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d48d7fd7bd1dc6d71ce6c24024a746cb75d0b60e
|
4
|
+
data.tar.gz: e4297f9c88cecef626495998bbb3838ee8829f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0260393250e6bb3fdb4a757b363b5b48d5bd3851bbb923b53170ca88a8b40b7f6c7319a4f0ff6c4b14e4fa8ede81c70b3b6eb343617c624f8a9e961e366d039e
|
7
|
+
data.tar.gz: be0c4572531338cd2cfa595ce523b9bd16d65bc4139942319862f77fb00215bd85c63bdda4261f6da2b0142a373b7fb862a8f9426f47cff7a5b91481f29670ce
|
data/README.md
CHANGED
@@ -316,6 +316,10 @@ Project.last.repository.as_indexed_json
|
|
316
316
|
:message=>"first commit\n"}]}
|
317
317
|
```
|
318
318
|
|
319
|
+
## TODO
|
320
|
+
|
321
|
+
* Add Exceptions handlers for indexing (Error connections and timeouts)
|
322
|
+
|
319
323
|
## Contributing
|
320
324
|
|
321
325
|
1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
|
data/elasticsearch-git.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Andrey Kumanyaev"]
|
10
10
|
spec.email = ["me@zzet.org"]
|
11
11
|
spec.summary = %q{Elasticsearch integrations for git repositories.}
|
12
|
-
spec.description = %q{Elasticsearch integrations for git repositories.}
|
12
|
+
spec.description = %q{Elasticsearch integrations for indexing git repositories.}
|
13
13
|
spec.homepage = "https://github.com/zzet/elasticsearch-git"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -19,10 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_runtime_dependency 'elasticsearch-model'
|
22
|
-
spec.add_runtime_dependency 'elasticsearch-api'
|
23
|
-
spec.add_runtime_dependency 'rugged'
|
24
|
-
spec.add_runtime_dependency 'charlock_holmes'
|
25
|
-
spec.add_runtime_dependency 'gitlab-linguist'
|
26
|
-
spec.add_runtime_dependency 'activemodel'
|
27
|
-
spec.add_runtime_dependency 'activesupport'
|
22
|
+
spec.add_runtime_dependency 'elasticsearch-api', '> 0.4.0'
|
23
|
+
spec.add_runtime_dependency 'rugged', '~> 0.19.0'
|
24
|
+
spec.add_runtime_dependency 'charlock_holmes', '~> 0.6.9'
|
25
|
+
spec.add_runtime_dependency 'gitlab-linguist', '> 2.9.0'
|
26
|
+
spec.add_runtime_dependency 'activemodel', '~> 4.0.0'
|
27
|
+
spec.add_runtime_dependency 'activesupport', '~> 4.0.0'
|
28
28
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'charlock_holmes'
|
3
|
+
|
4
|
+
module Elasticsearch
|
5
|
+
module Git
|
6
|
+
module EncoderHelper
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
included do
|
10
|
+
def encode!(message)
|
11
|
+
return nil unless message.respond_to? :force_encoding
|
12
|
+
|
13
|
+
# if message is utf-8 encoding, just return it
|
14
|
+
message.force_encoding("UTF-8")
|
15
|
+
return message if message.valid_encoding?
|
16
|
+
|
17
|
+
# return message if message type is binary
|
18
|
+
detect = CharlockHolmes::EncodingDetector.detect(message)
|
19
|
+
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
20
|
+
|
21
|
+
# encoding message to detect encoding
|
22
|
+
if detect && detect[:encoding]
|
23
|
+
message.force_encoding(detect[:encoding])
|
24
|
+
end
|
25
|
+
|
26
|
+
# encode and clean the bad chars
|
27
|
+
message.replace clean(message)
|
28
|
+
rescue
|
29
|
+
encoding = detect ? detect[:encoding] : "unknown"
|
30
|
+
"--broken encoding: #{encoding}"
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def clean(message)
|
36
|
+
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
|
37
|
+
.encode("UTF-8")
|
38
|
+
.gsub("\0".encode("UTF-8"), "")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'linguist'
|
2
|
+
require 'elasticsearch/git/encoder_helper'
|
3
|
+
|
4
|
+
module Elasticsearch
|
5
|
+
module Git
|
6
|
+
class LiteBlob
|
7
|
+
include Linguist::BlobHelper
|
8
|
+
include Elasticsearch::Git::EncoderHelper
|
9
|
+
|
10
|
+
attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
|
11
|
+
|
12
|
+
def initialize(repo, raw_blob_hash)
|
13
|
+
@id = raw_blob_hash[:oid]
|
14
|
+
|
15
|
+
blob = repo.lookup(@id)
|
16
|
+
|
17
|
+
@mode = '%06o' % raw_blob_hash[:filemode]
|
18
|
+
@size = blob.size
|
19
|
+
@path = encode!(raw_blob_hash[:path])
|
20
|
+
@name = @path.split('/').last
|
21
|
+
@data = encode!(blob.content)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -42,7 +42,7 @@ module Elasticsearch
|
|
42
42
|
},
|
43
43
|
tokenizer: {
|
44
44
|
sha_tokenizer: {
|
45
|
-
type: "
|
45
|
+
type: "edgeNGram",
|
46
46
|
min_gram: 8,
|
47
47
|
max_gram: 40,
|
48
48
|
token_chars: %w(letter digit)
|
@@ -61,7 +61,7 @@ module Elasticsearch
|
|
61
61
|
max_gram: 20
|
62
62
|
},
|
63
63
|
sha_ngrams: {
|
64
|
-
type: "
|
64
|
+
type: "edgeNGram",
|
65
65
|
min_gram: 8,
|
66
66
|
max_gram: 40
|
67
67
|
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'active_support/concern'
|
2
2
|
require 'active_model'
|
3
3
|
require 'elasticsearch'
|
4
|
-
require 'elasticsearch/model'
|
4
|
+
require 'elasticsearch/git/model'
|
5
|
+
require 'elasticsearch/git/encoder_helper'
|
6
|
+
require 'elasticsearch/git/lite_blob'
|
5
7
|
require 'rugged'
|
6
|
-
require 'linguist'
|
7
8
|
|
8
9
|
module Elasticsearch
|
9
10
|
module Git
|
@@ -12,30 +13,37 @@ module Elasticsearch
|
|
12
13
|
|
13
14
|
included do
|
14
15
|
include Elasticsearch::Git::Model
|
16
|
+
include Elasticsearch::Git::EncoderHelper
|
15
17
|
|
16
|
-
mapping do
|
18
|
+
mapping _timestamp: { enabled: true } do
|
17
19
|
indexes :blob do
|
18
20
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
19
21
|
indexes :rid, type: :string, index: :not_analyzed
|
20
|
-
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :
|
21
|
-
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :
|
22
|
-
indexes :
|
22
|
+
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
23
|
+
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
24
|
+
indexes :path, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
25
|
+
indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
26
|
+
indexes :language, type: :string, index: :not_analyzed
|
23
27
|
end
|
28
|
+
|
24
29
|
indexes :commit do
|
25
30
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
26
31
|
indexes :rid, type: :string, index: :not_analyzed
|
27
|
-
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :
|
32
|
+
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
33
|
+
|
28
34
|
indexes :author do
|
29
|
-
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
30
|
-
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
35
|
+
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
36
|
+
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
31
37
|
indexes :time, type: :date
|
32
38
|
end
|
39
|
+
|
33
40
|
indexes :commiter do
|
34
|
-
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
35
|
-
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :
|
41
|
+
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
42
|
+
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
36
43
|
indexes :time, type: :date
|
37
44
|
end
|
38
|
-
|
45
|
+
|
46
|
+
indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
|
39
47
|
end
|
40
48
|
end
|
41
49
|
|
@@ -79,7 +87,8 @@ module Elasticsearch
|
|
79
87
|
end
|
80
88
|
|
81
89
|
diff = repository_for_indexing.diff(from_rev, to_rev)
|
82
|
-
|
90
|
+
|
91
|
+
diff.deltas.reverse.each_with_index do |delta, step|
|
83
92
|
if delta.status == :deleted
|
84
93
|
b = LiteBlob.new(repository_for_indexing, delta.old_file)
|
85
94
|
delete_from_index_blob(b)
|
@@ -87,14 +96,20 @@ module Elasticsearch
|
|
87
96
|
b = LiteBlob.new(repository_for_indexing, delta.new_file)
|
88
97
|
index_blob(b, target_sha)
|
89
98
|
end
|
99
|
+
|
100
|
+
# Run GC every 100 blobs
|
101
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
90
102
|
end
|
91
103
|
else
|
92
104
|
if repository_for_indexing.bare?
|
93
105
|
recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
|
94
106
|
else
|
95
|
-
repository_for_indexing.index.
|
107
|
+
repository_for_indexing.index.each_with_index do |blob, step|
|
96
108
|
b = LiteBlob.new(repository_for_indexing, blob)
|
97
109
|
index_blob(b, target_sha)
|
110
|
+
|
111
|
+
# Run GC every 100 blobs
|
112
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
98
113
|
end
|
99
114
|
end
|
100
115
|
end
|
@@ -108,29 +123,51 @@ module Elasticsearch
|
|
108
123
|
index_blob(b, target_sha)
|
109
124
|
end
|
110
125
|
|
126
|
+
# Run GC every recurse step
|
127
|
+
ObjectSpace.garbage_collect
|
128
|
+
|
111
129
|
tree.each_tree do |nested_tree|
|
112
130
|
recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
|
113
131
|
end
|
114
132
|
end
|
115
133
|
|
116
134
|
def index_blob(blob, target_sha)
|
117
|
-
if blob
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
135
|
+
if can_index_blob?(blob)
|
136
|
+
tries = 0
|
137
|
+
begin
|
138
|
+
client_for_indexing.index \
|
139
|
+
index: "#{self.class.index_name}",
|
140
|
+
type: "repository",
|
141
|
+
id: "#{repository_id}_#{blob.path}",
|
142
|
+
body: {
|
143
|
+
blob: {
|
144
|
+
type: "blob",
|
145
|
+
oid: blob.id,
|
146
|
+
rid: repository_id,
|
147
|
+
content: blob.data,
|
148
|
+
commit_sha: target_sha,
|
149
|
+
path: blob.path,
|
150
|
+
language: blob.language ? blob.language.name : "Text"
|
151
|
+
}
|
129
152
|
}
|
130
|
-
|
153
|
+
rescue Exception => ex
|
154
|
+
# Retry 10 times send request
|
155
|
+
if tries < 10
|
156
|
+
tries += 1
|
157
|
+
sleep tries * 10 * rand(10)
|
158
|
+
retry
|
159
|
+
else
|
160
|
+
logger.warn "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
|
161
|
+
end
|
162
|
+
end
|
131
163
|
end
|
132
164
|
end
|
133
165
|
|
166
|
+
# Index text-like files which size less 1.mb
|
167
|
+
def can_index_blob?(blob)
|
168
|
+
blob.text? && (blob.size && blob.size.to_i < 1048576)
|
169
|
+
end
|
170
|
+
|
134
171
|
def delete_from_index_blob(blob)
|
135
172
|
if blob.text?
|
136
173
|
begin
|
@@ -140,6 +177,8 @@ module Elasticsearch
|
|
140
177
|
id: "#{repository_id}_#{blob.path}"
|
141
178
|
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
142
179
|
return true
|
180
|
+
rescue Exception => ex
|
181
|
+
logger.warn "Error with remove file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
|
143
182
|
end
|
144
183
|
end
|
145
184
|
end
|
@@ -167,52 +206,74 @@ module Elasticsearch
|
|
167
206
|
#
|
168
207
|
# For search from commits use type 'commit'
|
169
208
|
def index_commits(from_rev: nil, to_rev: nil)
|
170
|
-
|
209
|
+
to_rev = repository_for_indexing.head.target unless to_rev.present?
|
210
|
+
|
211
|
+
if to_rev != "0000000000000000000000000000000000000000"
|
212
|
+
# If to_rev correct
|
213
|
+
begin
|
214
|
+
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
215
|
+
rescue
|
216
|
+
raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
|
217
|
+
end
|
218
|
+
|
171
219
|
begin
|
172
220
|
if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
|
173
221
|
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
174
222
|
end
|
175
|
-
if to_rev != "0000000000000000000000000000000000000000"
|
176
|
-
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
177
|
-
end
|
178
223
|
rescue
|
179
224
|
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
180
225
|
end
|
181
226
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
227
|
+
# If pushed new branch no need reindex all repository
|
228
|
+
# Find merge_base and reindex diff
|
229
|
+
if from_rev == "0000000000000000000000000000000000000000" && to_rev != repository_for_indexing.head.target
|
230
|
+
from_rev = repository_for_indexing.merge_base(to_rev, repository_for_indexing.head.target)
|
231
|
+
end
|
187
232
|
|
188
|
-
walker
|
189
|
-
|
233
|
+
walker = Rugged::Walker.new(repository_for_indexing)
|
234
|
+
walker.push(to_rev)
|
235
|
+
|
236
|
+
if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
|
237
|
+
walker.hide(from_rev)
|
190
238
|
end
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
239
|
+
|
240
|
+
commits = walker.map { |c| c.oid }
|
241
|
+
walker.reset
|
242
|
+
|
243
|
+
commits.each_with_index do |commit, step|
|
244
|
+
index_commit(repository_for_indexing.lookup(commit))
|
245
|
+
ObjectSpace.garbage_collect if step % 100 == 0
|
197
246
|
end
|
198
247
|
end
|
199
248
|
end
|
200
249
|
|
201
250
|
def index_commit(commit)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
251
|
+
tries = 0
|
252
|
+
begin
|
253
|
+
client_for_indexing.index \
|
254
|
+
index: "#{self.class.index_name}",
|
255
|
+
type: "repository",
|
256
|
+
id: "#{repository_id}_#{commit.oid}",
|
257
|
+
body: {
|
258
|
+
commit: {
|
259
|
+
type: "commit",
|
260
|
+
rid: repository_id,
|
261
|
+
sha: commit.oid,
|
262
|
+
author: commit.author,
|
263
|
+
committer: commit.committer,
|
264
|
+
message: encode!(commit.message)
|
265
|
+
}
|
214
266
|
}
|
215
|
-
|
267
|
+
rescue Exception => ex
|
268
|
+
# Retry 10 times send request
|
269
|
+
if tries < 10
|
270
|
+
tries += 1
|
271
|
+
sleep tries * 10 * rand(10)
|
272
|
+
retry
|
273
|
+
else
|
274
|
+
logger.warn "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
|
275
|
+
end
|
276
|
+
end
|
216
277
|
end
|
217
278
|
|
218
279
|
# Representation of repository as indexed json
|
@@ -235,7 +296,7 @@ module Elasticsearch
|
|
235
296
|
result.push(recurse_blobs_index_hash(tree))
|
236
297
|
else
|
237
298
|
repository_for_indexing.index.each do |blob|
|
238
|
-
b =
|
299
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
239
300
|
result.push(
|
240
301
|
{
|
241
302
|
type: 'blob',
|
@@ -290,7 +351,7 @@ module Elasticsearch
|
|
290
351
|
sha: obj.oid,
|
291
352
|
author: obj.author,
|
292
353
|
committer: obj.committer,
|
293
|
-
message: obj.message
|
354
|
+
message: encode!(obj.message)
|
294
355
|
}
|
295
356
|
)
|
296
357
|
end
|
@@ -326,18 +387,18 @@ module Elasticsearch
|
|
326
387
|
end
|
327
388
|
|
328
389
|
def repository_for_indexing(repo_path = "")
|
390
|
+
return @rugged_repo_indexer if defined? @rugged_repo_indexer
|
391
|
+
|
329
392
|
@path_to_repo ||= repo_path
|
330
393
|
set_repository_id
|
331
|
-
Rugged::Repository.new(@path_to_repo)
|
394
|
+
@rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
|
332
395
|
end
|
333
396
|
|
334
397
|
def client_for_indexing
|
335
398
|
@client_for_indexing ||= Elasticsearch::Client.new log: true
|
336
399
|
end
|
337
|
-
end
|
338
400
|
|
339
|
-
|
340
|
-
def search(query, type: :all, page: 1, per: 20, options: {})
|
401
|
+
def self.search(query, type: :all, page: 1, per: 20, options: {})
|
341
402
|
results = { blobs: [], commits: []}
|
342
403
|
case type.to_sym
|
343
404
|
when :all
|
@@ -352,6 +413,12 @@ module Elasticsearch
|
|
352
413
|
results
|
353
414
|
end
|
354
415
|
|
416
|
+
def logger
|
417
|
+
@logger ||= Logger.new(STDOUT)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
module ClassMethods
|
355
422
|
def search_commit(query, page: 1, per: 20, options: {})
|
356
423
|
page ||= 1
|
357
424
|
|
@@ -364,11 +431,19 @@ module Elasticsearch
|
|
364
431
|
multi_match: {
|
365
432
|
fields: fields,
|
366
433
|
query: "#{query}",
|
367
|
-
operator: :
|
434
|
+
operator: :or
|
368
435
|
}
|
369
436
|
},
|
370
437
|
},
|
371
438
|
},
|
439
|
+
facets: {
|
440
|
+
commitRepositoryFaset: {
|
441
|
+
terms: {
|
442
|
+
field: "commit.rid",
|
443
|
+
all_term: true
|
444
|
+
}
|
445
|
+
}
|
446
|
+
},
|
372
447
|
size: per,
|
373
448
|
from: per * (page - 1)
|
374
449
|
}
|
@@ -387,11 +462,28 @@ module Elasticsearch
|
|
387
462
|
}
|
388
463
|
end
|
389
464
|
|
465
|
+
options[:order] = :default if options[:order].blank?
|
466
|
+
order = case options[:order].to_sym
|
467
|
+
when :recently_indexed
|
468
|
+
{ _timestamp: { order: :desc, mode: :min } }
|
469
|
+
when :last_indexed
|
470
|
+
{ _timestamp: { order: :asc, mode: :min } }
|
471
|
+
else
|
472
|
+
{}
|
473
|
+
end
|
474
|
+
|
475
|
+
query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
|
476
|
+
|
390
477
|
if options[:highlight]
|
391
|
-
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
478
|
+
#query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
392
479
|
end
|
393
480
|
|
394
|
-
self.__elasticsearch__.search(query_hash)
|
481
|
+
res = self.__elasticsearch__.search(query_hash)
|
482
|
+
{
|
483
|
+
results: res.results,
|
484
|
+
total_count: res.total_count,
|
485
|
+
repositories: res.response["facets"]["commitRepositoryFaset"]["terms"]
|
486
|
+
}
|
395
487
|
end
|
396
488
|
|
397
489
|
def search_blob(query, type: :all, page: 1, per: 20, options: {})
|
@@ -404,12 +496,26 @@ module Elasticsearch
|
|
404
496
|
match: {
|
405
497
|
'blob.content' => {
|
406
498
|
query: "#{query}",
|
407
|
-
operator: :
|
499
|
+
operator: :or
|
408
500
|
}
|
409
501
|
}
|
410
502
|
}
|
411
503
|
}
|
412
504
|
},
|
505
|
+
facets: {
|
506
|
+
languageFacet: {
|
507
|
+
terms: {
|
508
|
+
field: :language,
|
509
|
+
all_term: true
|
510
|
+
}
|
511
|
+
},
|
512
|
+
blobRepositoryFaset: {
|
513
|
+
terms: {
|
514
|
+
field: :rid,
|
515
|
+
all_term: true
|
516
|
+
}
|
517
|
+
}
|
518
|
+
},
|
413
519
|
size: per,
|
414
520
|
from: per * (page - 1)
|
415
521
|
}
|
@@ -423,56 +529,48 @@ module Elasticsearch
|
|
423
529
|
}
|
424
530
|
end
|
425
531
|
|
426
|
-
if options[:
|
427
|
-
query_hash[:
|
532
|
+
if options[:language]
|
533
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
534
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
535
|
+
terms: {
|
536
|
+
"blob.language" => [options[:language]].flatten
|
537
|
+
}
|
538
|
+
}
|
428
539
|
end
|
429
540
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
541
|
+
options[:order] = :default if options[:order].blank?
|
542
|
+
order = case options[:order].to_sym
|
543
|
+
when :recently_indexed
|
544
|
+
{ _timestamp: { order: :desc, mode: :min } }
|
545
|
+
when :last_indexed
|
546
|
+
{ _timestamp: { order: :asc, mode: :min } }
|
547
|
+
else
|
548
|
+
{}
|
549
|
+
end
|
437
550
|
|
438
|
-
|
551
|
+
query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
|
439
552
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
message.force_encoding("UTF-8")
|
452
|
-
return message if message.valid_encoding?
|
553
|
+
if options[:highlight]
|
554
|
+
query_hash[:highlight] = {
|
555
|
+
pre_tags: [""],
|
556
|
+
post_tags: [""],
|
557
|
+
fields: {
|
558
|
+
"blob.content" => {},
|
559
|
+
"type" => "fvh",
|
560
|
+
"boundary_chars" => "\n"
|
561
|
+
}
|
562
|
+
}
|
563
|
+
end
|
453
564
|
|
454
|
-
|
455
|
-
detect = CharlockHolmes::EncodingDetector.detect(message)
|
456
|
-
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
565
|
+
res = self.__elasticsearch__.search(query_hash)
|
457
566
|
|
458
|
-
|
459
|
-
|
460
|
-
|
567
|
+
{
|
568
|
+
results: res.results,
|
569
|
+
total_count: res.total_count,
|
570
|
+
languages: res.response["facets"]["languageFacet"]["terms"],
|
571
|
+
repositories: res.response["facets"]["blobRepositoryFaset"]["terms"]
|
572
|
+
}
|
461
573
|
end
|
462
|
-
|
463
|
-
# encode and clean the bad chars
|
464
|
-
message.replace clean(message)
|
465
|
-
rescue
|
466
|
-
encoding = detect ? detect[:encoding] : "unknown"
|
467
|
-
"--broken encoding: #{encoding}"
|
468
|
-
end
|
469
|
-
|
470
|
-
private
|
471
|
-
|
472
|
-
def clean(message)
|
473
|
-
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
|
474
|
-
.encode("UTF-8")
|
475
|
-
.gsub("\0".encode("UTF-8"), "")
|
476
574
|
end
|
477
575
|
end
|
478
576
|
end
|
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch-git
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey Kumanyaev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch-model
|
@@ -28,87 +28,87 @@ dependencies:
|
|
28
28
|
name: elasticsearch-api
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - '
|
31
|
+
- - '>'
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - '
|
38
|
+
- - '>'
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rugged
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.19.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.19.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: charlock_holmes
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.6.9
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.6.9
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: gitlab-linguist
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - '
|
73
|
+
- - '>'
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 2.9.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - '
|
80
|
+
- - '>'
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: 2.9.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: activemodel
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ~>
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 4.0.0
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ~>
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 4.0.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: activesupport
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ~>
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
103
|
+
version: 4.0.0
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ~>
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
111
|
-
description: Elasticsearch integrations for git repositories.
|
110
|
+
version: 4.0.0
|
111
|
+
description: Elasticsearch integrations for indexing git repositories.
|
112
112
|
email:
|
113
113
|
- me@zzet.org
|
114
114
|
executables: []
|
@@ -122,10 +122,12 @@ files:
|
|
122
122
|
- Rakefile
|
123
123
|
- elasticsearch-git.gemspec
|
124
124
|
- lib/elasticsearch/git.rb
|
125
|
+
- lib/elasticsearch/git/encoder_helper.rb
|
126
|
+
- lib/elasticsearch/git/lite_blob.rb
|
125
127
|
- lib/elasticsearch/git/model.rb
|
126
128
|
- lib/elasticsearch/git/repository.rb
|
127
129
|
- lib/elasticsearch/git/version.rb
|
128
|
-
-
|
130
|
+
- test/test_helper.rb
|
129
131
|
homepage: https://github.com/zzet/elasticsearch-git
|
130
132
|
licenses:
|
131
133
|
- MIT
|
@@ -150,4 +152,5 @@ rubygems_version: 2.0.3
|
|
150
152
|
signing_key:
|
151
153
|
specification_version: 4
|
152
154
|
summary: Elasticsearch integrations for git repositories.
|
153
|
-
test_files:
|
155
|
+
test_files:
|
156
|
+
- test/test_helper.rb
|