elasticsearch-git 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6dfec96f6e2fbded64a0429c7b083aadf96234b
4
- data.tar.gz: 8eb678cf43ecbb8aeabc1016e00f3b721010f269
3
+ metadata.gz: d48d7fd7bd1dc6d71ce6c24024a746cb75d0b60e
4
+ data.tar.gz: e4297f9c88cecef626495998bbb3838ee8829f61
5
5
  SHA512:
6
- metadata.gz: b88db3a9d3647bcd9c3eae1039e7b2380e51fcbf4b02b242e2bda5acc2cff8c58b8d2a1ad2941fedb586f3cfd318f9c9b386fa9bcdfeaeabdaef616c3c891f38
7
- data.tar.gz: 5b2fa5f90008f048b33627724847ca317e36e766fdbc4e9e185e8ba26c7edc16a4d3981a5e6577c4943cfad45fe3825694728ca9eef8a3927414ef8466f9ca55
6
+ metadata.gz: 0260393250e6bb3fdb4a757b363b5b48d5bd3851bbb923b53170ca88a8b40b7f6c7319a4f0ff6c4b14e4fa8ede81c70b3b6eb343617c624f8a9e961e366d039e
7
+ data.tar.gz: be0c4572531338cd2cfa595ce523b9bd16d65bc4139942319862f77fb00215bd85c63bdda4261f6da2b0142a373b7fb862a8f9426f47cff7a5b91481f29670ce
data/README.md CHANGED
@@ -316,6 +316,10 @@ Project.last.repository.as_indexed_json
316
316
  :message=>"first commit\n"}]}
317
317
  ```
318
318
 
319
+ ## TODO
320
+
321
+ * Add Exceptions handlers for indexing (Error connections and timeouts)
322
+
319
323
  ## Contributing
320
324
 
321
325
  1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Andrey Kumanyaev"]
10
10
  spec.email = ["me@zzet.org"]
11
11
  spec.summary = %q{Elasticsearch integrations for git repositories.}
12
- spec.description = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for indexing git repositories.}
13
13
  spec.homepage = "https://github.com/zzet/elasticsearch-git"
14
14
  spec.license = "MIT"
15
15
 
@@ -19,10 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_runtime_dependency 'elasticsearch-model'
22
- spec.add_runtime_dependency 'elasticsearch-api'
23
- spec.add_runtime_dependency 'rugged'
24
- spec.add_runtime_dependency 'charlock_holmes'
25
- spec.add_runtime_dependency 'gitlab-linguist'
26
- spec.add_runtime_dependency 'activemodel'
27
- spec.add_runtime_dependency 'activesupport'
22
+ spec.add_runtime_dependency 'elasticsearch-api', '> 0.4.0'
23
+ spec.add_runtime_dependency 'rugged', '~> 0.19.0'
24
+ spec.add_runtime_dependency 'charlock_holmes', '~> 0.6.9'
25
+ spec.add_runtime_dependency 'gitlab-linguist', '> 2.9.0'
26
+ spec.add_runtime_dependency 'activemodel', '~> 4.0.0'
27
+ spec.add_runtime_dependency 'activesupport', '~> 4.0.0'
28
28
  end
@@ -0,0 +1,43 @@
1
+ require 'active_support/concern'
2
+ require 'charlock_holmes'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ module EncoderHelper
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ def encode!(message)
11
+ return nil unless message.respond_to? :force_encoding
12
+
13
+ # if message is utf-8 encoding, just return it
14
+ message.force_encoding("UTF-8")
15
+ return message if message.valid_encoding?
16
+
17
+ # return message if message type is binary
18
+ detect = CharlockHolmes::EncodingDetector.detect(message)
19
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
20
+
21
+ # encoding message to detect encoding
22
+ if detect && detect[:encoding]
23
+ message.force_encoding(detect[:encoding])
24
+ end
25
+
26
+ # encode and clean the bad chars
27
+ message.replace clean(message)
28
+ rescue
29
+ encoding = detect ? detect[:encoding] : "unknown"
30
+ "--broken encoding: #{encoding}"
31
+ end
32
+
33
+ private
34
+
35
+ def clean(message)
36
+ message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
37
+ .encode("UTF-8")
38
+ .gsub("\0".encode("UTF-8"), "")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,25 @@
1
+ require 'linguist'
2
+ require 'elasticsearch/git/encoder_helper'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ class LiteBlob
7
+ include Linguist::BlobHelper
8
+ include Elasticsearch::Git::EncoderHelper
9
+
10
+ attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
11
+
12
+ def initialize(repo, raw_blob_hash)
13
+ @id = raw_blob_hash[:oid]
14
+
15
+ blob = repo.lookup(@id)
16
+
17
+ @mode = '%06o' % raw_blob_hash[:filemode]
18
+ @size = blob.size
19
+ @path = encode!(raw_blob_hash[:path])
20
+ @name = @path.split('/').last
21
+ @data = encode!(blob.content)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -42,7 +42,7 @@ module Elasticsearch
42
42
  },
43
43
  tokenizer: {
44
44
  sha_tokenizer: {
45
- type: "NGram",
45
+ type: "edgeNGram",
46
46
  min_gram: 8,
47
47
  max_gram: 40,
48
48
  token_chars: %w(letter digit)
@@ -61,7 +61,7 @@ module Elasticsearch
61
61
  max_gram: 20
62
62
  },
63
63
  sha_ngrams: {
64
- type: "NGram",
64
+ type: "edgeNGram",
65
65
  min_gram: 8,
66
66
  max_gram: 40
67
67
  }
@@ -1,9 +1,10 @@
1
1
  require 'active_support/concern'
2
2
  require 'active_model'
3
3
  require 'elasticsearch'
4
- require 'elasticsearch/model'
4
+ require 'elasticsearch/git/model'
5
+ require 'elasticsearch/git/encoder_helper'
6
+ require 'elasticsearch/git/lite_blob'
5
7
  require 'rugged'
6
- require 'linguist'
7
8
 
8
9
  module Elasticsearch
9
10
  module Git
@@ -12,30 +13,37 @@ module Elasticsearch
12
13
 
13
14
  included do
14
15
  include Elasticsearch::Git::Model
16
+ include Elasticsearch::Git::EncoderHelper
15
17
 
16
- mapping do
18
+ mapping _timestamp: { enabled: true } do
17
19
  indexes :blob do
18
20
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
21
  indexes :rid, type: :string, index: :not_analyzed
20
- indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
- indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
22
- indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
23
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
24
+ indexes :path, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
25
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
26
+ indexes :language, type: :string, index: :not_analyzed
23
27
  end
28
+
24
29
  indexes :commit do
25
30
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
26
31
  indexes :rid, type: :string, index: :not_analyzed
27
- indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
32
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
33
+
28
34
  indexes :author do
29
- indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
30
- indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
35
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
36
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
31
37
  indexes :time, type: :date
32
38
  end
39
+
33
40
  indexes :commiter do
34
- indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
35
- indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
41
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
42
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
36
43
  indexes :time, type: :date
37
44
  end
38
- indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
45
+
46
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
39
47
  end
40
48
  end
41
49
 
@@ -79,7 +87,8 @@ module Elasticsearch
79
87
  end
80
88
 
81
89
  diff = repository_for_indexing.diff(from_rev, to_rev)
82
- diff.deltas.reverse.each do |delta|
90
+
91
+ diff.deltas.reverse.each_with_index do |delta, step|
83
92
  if delta.status == :deleted
84
93
  b = LiteBlob.new(repository_for_indexing, delta.old_file)
85
94
  delete_from_index_blob(b)
@@ -87,14 +96,20 @@ module Elasticsearch
87
96
  b = LiteBlob.new(repository_for_indexing, delta.new_file)
88
97
  index_blob(b, target_sha)
89
98
  end
99
+
100
+ # Run GC every 100 blobs
101
+ ObjectSpace.garbage_collect if step % 100 == 0
90
102
  end
91
103
  else
92
104
  if repository_for_indexing.bare?
93
105
  recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
94
106
  else
95
- repository_for_indexing.index.each do |blob|
107
+ repository_for_indexing.index.each_with_index do |blob, step|
96
108
  b = LiteBlob.new(repository_for_indexing, blob)
97
109
  index_blob(b, target_sha)
110
+
111
+ # Run GC every 100 blobs
112
+ ObjectSpace.garbage_collect if step % 100 == 0
98
113
  end
99
114
  end
100
115
  end
@@ -108,29 +123,51 @@ module Elasticsearch
108
123
  index_blob(b, target_sha)
109
124
  end
110
125
 
126
+ # Run GC every recurse step
127
+ ObjectSpace.garbage_collect
128
+
111
129
  tree.each_tree do |nested_tree|
112
130
  recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
113
131
  end
114
132
  end
115
133
 
116
134
  def index_blob(blob, target_sha)
117
- if blob.text?
118
- client_for_indexing.index \
119
- index: "#{self.class.index_name}",
120
- type: "repository",
121
- id: "#{repository_id}_#{blob.path}",
122
- body: {
123
- blob: {
124
- type: "blob",
125
- oid: blob.id,
126
- rid: repository_id,
127
- content: blob.data,
128
- commit_sha: target_sha
135
+ if can_index_blob?(blob)
136
+ tries = 0
137
+ begin
138
+ client_for_indexing.index \
139
+ index: "#{self.class.index_name}",
140
+ type: "repository",
141
+ id: "#{repository_id}_#{blob.path}",
142
+ body: {
143
+ blob: {
144
+ type: "blob",
145
+ oid: blob.id,
146
+ rid: repository_id,
147
+ content: blob.data,
148
+ commit_sha: target_sha,
149
+ path: blob.path,
150
+ language: blob.language ? blob.language.name : "Text"
151
+ }
129
152
  }
130
- }
153
+ rescue Exception => ex
154
+ # Retry 10 times send request
155
+ if tries < 10
156
+ tries += 1
157
+ sleep tries * 10 * rand(10)
158
+ retry
159
+ else
160
+ logger.warn "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
161
+ end
162
+ end
131
163
  end
132
164
  end
133
165
 
166
+ # Index text-like files which size less 1.mb
167
+ def can_index_blob?(blob)
168
+ blob.text? && (blob.size && blob.size.to_i < 1048576)
169
+ end
170
+
134
171
  def delete_from_index_blob(blob)
135
172
  if blob.text?
136
173
  begin
@@ -140,6 +177,8 @@ module Elasticsearch
140
177
  id: "#{repository_id}_#{blob.path}"
141
178
  rescue Elasticsearch::Transport::Transport::Errors::NotFound
142
179
  return true
180
+ rescue Exception => ex
181
+ logger.warn "Error with remove file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
143
182
  end
144
183
  end
145
184
  end
@@ -167,52 +206,74 @@ module Elasticsearch
167
206
  #
168
207
  # For search from commits use type 'commit'
169
208
  def index_commits(from_rev: nil, to_rev: nil)
170
- if to_rev.present?
209
+ to_rev = repository_for_indexing.head.target unless to_rev.present?
210
+
211
+ if to_rev != "0000000000000000000000000000000000000000"
212
+ # If to_rev correct
213
+ begin
214
+ raise unless repository_for_indexing.lookup(to_rev).type == :commit
215
+ rescue
216
+ raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
217
+ end
218
+
171
219
  begin
172
220
  if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
173
221
  raise unless repository_for_indexing.lookup(from_rev).type == :commit
174
222
  end
175
- if to_rev != "0000000000000000000000000000000000000000"
176
- raise unless repository_for_indexing.lookup(to_rev).type == :commit
177
- end
178
223
  rescue
179
224
  raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
180
225
  end
181
226
 
182
- walker = if from_rev == "0000000000000000000000000000000000000000" || from_rev.nil?
183
- repository_for_indexing.walk(to_rev)
184
- else
185
- repository_for_indexing.walk(from_rev, to_rev)
186
- end
227
+ # If pushed new branch no need reindex all repository
228
+ # Find merge_base and reindex diff
229
+ if from_rev == "0000000000000000000000000000000000000000" && to_rev != repository_for_indexing.head.target
230
+ from_rev = repository_for_indexing.merge_base(to_rev, repository_for_indexing.head.target)
231
+ end
187
232
 
188
- walker.each do |commit|
189
- index_commit(commit)
233
+ walker = Rugged::Walker.new(repository_for_indexing)
234
+ walker.push(to_rev)
235
+
236
+ if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
237
+ walker.hide(from_rev)
190
238
  end
191
- else
192
- repository_for_indexing.each_id do |oid|
193
- obj = repository_for_indexing.lookup(oid)
194
- if obj.type == :commit
195
- index_commit(obj)
196
- end
239
+
240
+ commits = walker.map { |c| c.oid }
241
+ walker.reset
242
+
243
+ commits.each_with_index do |commit, step|
244
+ index_commit(repository_for_indexing.lookup(commit))
245
+ ObjectSpace.garbage_collect if step % 100 == 0
197
246
  end
198
247
  end
199
248
  end
200
249
 
201
250
  def index_commit(commit)
202
- client_for_indexing.index \
203
- index: "#{self.class.index_name}",
204
- type: "repository",
205
- id: "#{repository_id}_#{commit.oid}",
206
- body: {
207
- commit: {
208
- type: "commit",
209
- rid: repository_id,
210
- sha: commit.oid,
211
- author: commit.author,
212
- committer: commit.committer,
213
- message: commit.message
251
+ tries = 0
252
+ begin
253
+ client_for_indexing.index \
254
+ index: "#{self.class.index_name}",
255
+ type: "repository",
256
+ id: "#{repository_id}_#{commit.oid}",
257
+ body: {
258
+ commit: {
259
+ type: "commit",
260
+ rid: repository_id,
261
+ sha: commit.oid,
262
+ author: commit.author,
263
+ committer: commit.committer,
264
+ message: encode!(commit.message)
265
+ }
214
266
  }
215
- }
267
+ rescue Exception => ex
268
+ # Retry 10 times send request
269
+ if tries < 10
270
+ tries += 1
271
+ sleep tries * 10 * rand(10)
272
+ retry
273
+ else
274
+ logger.warn "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
275
+ end
276
+ end
216
277
  end
217
278
 
218
279
  # Representation of repository as indexed json
@@ -235,7 +296,7 @@ module Elasticsearch
235
296
  result.push(recurse_blobs_index_hash(tree))
236
297
  else
237
298
  repository_for_indexing.index.each do |blob|
238
- b = EasyBlob.new(repository_for_indexing, blob)
299
+ b = LiteBlob.new(repository_for_indexing, blob)
239
300
  result.push(
240
301
  {
241
302
  type: 'blob',
@@ -290,7 +351,7 @@ module Elasticsearch
290
351
  sha: obj.oid,
291
352
  author: obj.author,
292
353
  committer: obj.committer,
293
- message: obj.message
354
+ message: encode!(obj.message)
294
355
  }
295
356
  )
296
357
  end
@@ -326,18 +387,18 @@ module Elasticsearch
326
387
  end
327
388
 
328
389
  def repository_for_indexing(repo_path = "")
390
+ return @rugged_repo_indexer if defined? @rugged_repo_indexer
391
+
329
392
  @path_to_repo ||= repo_path
330
393
  set_repository_id
331
- Rugged::Repository.new(@path_to_repo)
394
+ @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
332
395
  end
333
396
 
334
397
  def client_for_indexing
335
398
  @client_for_indexing ||= Elasticsearch::Client.new log: true
336
399
  end
337
- end
338
400
 
339
- module ClassMethods
340
- def search(query, type: :all, page: 1, per: 20, options: {})
401
+ def self.search(query, type: :all, page: 1, per: 20, options: {})
341
402
  results = { blobs: [], commits: []}
342
403
  case type.to_sym
343
404
  when :all
@@ -352,6 +413,12 @@ module Elasticsearch
352
413
  results
353
414
  end
354
415
 
416
+ def logger
417
+ @logger ||= Logger.new(STDOUT)
418
+ end
419
+ end
420
+
421
+ module ClassMethods
355
422
  def search_commit(query, page: 1, per: 20, options: {})
356
423
  page ||= 1
357
424
 
@@ -364,11 +431,19 @@ module Elasticsearch
364
431
  multi_match: {
365
432
  fields: fields,
366
433
  query: "#{query}",
367
- operator: :and
434
+ operator: :or
368
435
  }
369
436
  },
370
437
  },
371
438
  },
439
+ facets: {
440
+ commitRepositoryFaset: {
441
+ terms: {
442
+ field: "commit.rid",
443
+ all_term: true
444
+ }
445
+ }
446
+ },
372
447
  size: per,
373
448
  from: per * (page - 1)
374
449
  }
@@ -387,11 +462,28 @@ module Elasticsearch
387
462
  }
388
463
  end
389
464
 
465
+ options[:order] = :default if options[:order].blank?
466
+ order = case options[:order].to_sym
467
+ when :recently_indexed
468
+ { _timestamp: { order: :desc, mode: :min } }
469
+ when :last_indexed
470
+ { _timestamp: { order: :asc, mode: :min } }
471
+ else
472
+ {}
473
+ end
474
+
475
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
476
+
390
477
  if options[:highlight]
391
- query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
478
+ #query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
392
479
  end
393
480
 
394
- self.__elasticsearch__.search(query_hash).results
481
+ res = self.__elasticsearch__.search(query_hash)
482
+ {
483
+ results: res.results,
484
+ total_count: res.total_count,
485
+ repositories: res.response["facets"]["commitRepositoryFaset"]["terms"]
486
+ }
395
487
  end
396
488
 
397
489
  def search_blob(query, type: :all, page: 1, per: 20, options: {})
@@ -404,12 +496,26 @@ module Elasticsearch
404
496
  match: {
405
497
  'blob.content' => {
406
498
  query: "#{query}",
407
- operator: :and
499
+ operator: :or
408
500
  }
409
501
  }
410
502
  }
411
503
  }
412
504
  },
505
+ facets: {
506
+ languageFacet: {
507
+ terms: {
508
+ field: :language,
509
+ all_term: true
510
+ }
511
+ },
512
+ blobRepositoryFaset: {
513
+ terms: {
514
+ field: :rid,
515
+ all_term: true
516
+ }
517
+ }
518
+ },
413
519
  size: per,
414
520
  from: per * (page - 1)
415
521
  }
@@ -423,56 +529,48 @@ module Elasticsearch
423
529
  }
424
530
  end
425
531
 
426
- if options[:highlight]
427
- query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
532
+ if options[:language]
533
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
534
+ query_hash[:query][:filtered][:filter][:and] << {
535
+ terms: {
536
+ "blob.language" => [options[:language]].flatten
537
+ }
538
+ }
428
539
  end
429
540
 
430
- self.__elasticsearch__.search(query_hash).results
431
- end
432
- end
433
- end
434
-
435
- class LiteBlob
436
- include Linguist::BlobHelper
541
+ options[:order] = :default if options[:order].blank?
542
+ order = case options[:order].to_sym
543
+ when :recently_indexed
544
+ { _timestamp: { order: :desc, mode: :min } }
545
+ when :last_indexed
546
+ { _timestamp: { order: :asc, mode: :min } }
547
+ else
548
+ {}
549
+ end
437
550
 
438
- attr_accessor :id, :name, :path, :data, :commit_id
551
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
439
552
 
440
- def initialize(repo, raw_blob_hash)
441
- @id = raw_blob_hash[:oid]
442
- @path = raw_blob_hash[:path]
443
- @name = @path.split("/").last
444
- @data = encode!(repo.lookup(@id).content)
445
- end
446
-
447
- def encode!(message)
448
- return nil unless message.respond_to? :force_encoding
449
-
450
- # if message is utf-8 encoding, just return it
451
- message.force_encoding("UTF-8")
452
- return message if message.valid_encoding?
553
+ if options[:highlight]
554
+ query_hash[:highlight] = {
555
+ pre_tags: [""],
556
+ post_tags: [""],
557
+ fields: {
558
+ "blob.content" => {},
559
+ "type" => "fvh",
560
+ "boundary_chars" => "\n"
561
+ }
562
+ }
563
+ end
453
564
 
454
- # return message if message type is binary
455
- detect = CharlockHolmes::EncodingDetector.detect(message)
456
- return message.force_encoding("BINARY") if detect && detect[:type] == :binary
565
+ res = self.__elasticsearch__.search(query_hash)
457
566
 
458
- # encoding message to detect encoding
459
- if detect && detect[:encoding]
460
- message.force_encoding(detect[:encoding])
567
+ {
568
+ results: res.results,
569
+ total_count: res.total_count,
570
+ languages: res.response["facets"]["languageFacet"]["terms"],
571
+ repositories: res.response["facets"]["blobRepositoryFaset"]["terms"]
572
+ }
461
573
  end
462
-
463
- # encode and clean the bad chars
464
- message.replace clean(message)
465
- rescue
466
- encoding = detect ? detect[:encoding] : "unknown"
467
- "--broken encoding: #{encoding}"
468
- end
469
-
470
- private
471
-
472
- def clean(message)
473
- message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
474
- .encode("UTF-8")
475
- .gsub("\0".encode("UTF-8"), "")
476
574
  end
477
575
  end
478
576
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticsearch
2
2
  module Git
3
- VERSION = "0.0.3"
3
+ VERSION = "0.0.4"
4
4
  end
5
5
  end
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch-git
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey Kumanyaev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-19 00:00:00.000000000 Z
11
+ date: 2014-04-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch-model
@@ -28,87 +28,87 @@ dependencies:
28
28
  name: elasticsearch-api
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - '>'
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - '>'
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rugged
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 0.19.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 0.19.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: charlock_holmes
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.6.9
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 0.6.9
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: gitlab-linguist
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - '>'
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: 2.9.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - '>'
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: 2.9.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: activemodel
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - ~>
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: 4.0.0
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ~>
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: 4.0.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: activesupport
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - '>='
101
+ - - ~>
102
102
  - !ruby/object:Gem::Version
103
- version: '0'
103
+ version: 4.0.0
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - '>='
108
+ - - ~>
109
109
  - !ruby/object:Gem::Version
110
- version: '0'
111
- description: Elasticsearch integrations for git repositories.
110
+ version: 4.0.0
111
+ description: Elasticsearch integrations for indexing git repositories.
112
112
  email:
113
113
  - me@zzet.org
114
114
  executables: []
@@ -122,10 +122,12 @@ files:
122
122
  - Rakefile
123
123
  - elasticsearch-git.gemspec
124
124
  - lib/elasticsearch/git.rb
125
+ - lib/elasticsearch/git/encoder_helper.rb
126
+ - lib/elasticsearch/git/lite_blob.rb
125
127
  - lib/elasticsearch/git/model.rb
126
128
  - lib/elasticsearch/git/repository.rb
127
129
  - lib/elasticsearch/git/version.rb
128
- - lib/test/test_helper.rb
130
+ - test/test_helper.rb
129
131
  homepage: https://github.com/zzet/elasticsearch-git
130
132
  licenses:
131
133
  - MIT
@@ -150,4 +152,5 @@ rubygems_version: 2.0.3
150
152
  signing_key:
151
153
  specification_version: 4
152
154
  summary: Elasticsearch integrations for git repositories.
153
- test_files: []
155
+ test_files:
156
+ - test/test_helper.rb