elasticsearch-git 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6dfec96f6e2fbded64a0429c7b083aadf96234b
4
- data.tar.gz: 8eb678cf43ecbb8aeabc1016e00f3b721010f269
3
+ metadata.gz: d48d7fd7bd1dc6d71ce6c24024a746cb75d0b60e
4
+ data.tar.gz: e4297f9c88cecef626495998bbb3838ee8829f61
5
5
  SHA512:
6
- metadata.gz: b88db3a9d3647bcd9c3eae1039e7b2380e51fcbf4b02b242e2bda5acc2cff8c58b8d2a1ad2941fedb586f3cfd318f9c9b386fa9bcdfeaeabdaef616c3c891f38
7
- data.tar.gz: 5b2fa5f90008f048b33627724847ca317e36e766fdbc4e9e185e8ba26c7edc16a4d3981a5e6577c4943cfad45fe3825694728ca9eef8a3927414ef8466f9ca55
6
+ metadata.gz: 0260393250e6bb3fdb4a757b363b5b48d5bd3851bbb923b53170ca88a8b40b7f6c7319a4f0ff6c4b14e4fa8ede81c70b3b6eb343617c624f8a9e961e366d039e
7
+ data.tar.gz: be0c4572531338cd2cfa595ce523b9bd16d65bc4139942319862f77fb00215bd85c63bdda4261f6da2b0142a373b7fb862a8f9426f47cff7a5b91481f29670ce
data/README.md CHANGED
@@ -316,6 +316,10 @@ Project.last.repository.as_indexed_json
316
316
  :message=>"first commit\n"}]}
317
317
  ```
318
318
 
319
+ ## TODO
320
+
321
+ * Add Exceptions handlers for indexing (Error connections and timeouts)
322
+
319
323
  ## Contributing
320
324
 
321
325
  1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Andrey Kumanyaev"]
10
10
  spec.email = ["me@zzet.org"]
11
11
  spec.summary = %q{Elasticsearch integrations for git repositories.}
12
- spec.description = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for indexing git repositories.}
13
13
  spec.homepage = "https://github.com/zzet/elasticsearch-git"
14
14
  spec.license = "MIT"
15
15
 
@@ -19,10 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_runtime_dependency 'elasticsearch-model'
22
- spec.add_runtime_dependency 'elasticsearch-api'
23
- spec.add_runtime_dependency 'rugged'
24
- spec.add_runtime_dependency 'charlock_holmes'
25
- spec.add_runtime_dependency 'gitlab-linguist'
26
- spec.add_runtime_dependency 'activemodel'
27
- spec.add_runtime_dependency 'activesupport'
22
+ spec.add_runtime_dependency 'elasticsearch-api', '> 0.4.0'
23
+ spec.add_runtime_dependency 'rugged', '~> 0.19.0'
24
+ spec.add_runtime_dependency 'charlock_holmes', '~> 0.6.9'
25
+ spec.add_runtime_dependency 'gitlab-linguist', '> 2.9.0'
26
+ spec.add_runtime_dependency 'activemodel', '~> 4.0.0'
27
+ spec.add_runtime_dependency 'activesupport', '~> 4.0.0'
28
28
  end
@@ -0,0 +1,43 @@
1
+ require 'active_support/concern'
2
+ require 'charlock_holmes'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ module EncoderHelper
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ def encode!(message)
11
+ return nil unless message.respond_to? :force_encoding
12
+
13
+ # if message is utf-8 encoding, just return it
14
+ message.force_encoding("UTF-8")
15
+ return message if message.valid_encoding?
16
+
17
+ # return message if message type is binary
18
+ detect = CharlockHolmes::EncodingDetector.detect(message)
19
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
20
+
21
+ # encoding message to detect encoding
22
+ if detect && detect[:encoding]
23
+ message.force_encoding(detect[:encoding])
24
+ end
25
+
26
+ # encode and clean the bad chars
27
+ message.replace clean(message)
28
+ rescue
29
+ encoding = detect ? detect[:encoding] : "unknown"
30
+ "--broken encoding: #{encoding}"
31
+ end
32
+
33
+ private
34
+
35
+ def clean(message)
36
+ message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
37
+ .encode("UTF-8")
38
+ .gsub("\0".encode("UTF-8"), "")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,25 @@
1
+ require 'linguist'
2
+ require 'elasticsearch/git/encoder_helper'
3
+
4
+ module Elasticsearch
5
+ module Git
6
+ class LiteBlob
7
+ include Linguist::BlobHelper
8
+ include Elasticsearch::Git::EncoderHelper
9
+
10
+ attr_accessor :id, :name, :path, :data, :size, :mode, :commit_id
11
+
12
+ def initialize(repo, raw_blob_hash)
13
+ @id = raw_blob_hash[:oid]
14
+
15
+ blob = repo.lookup(@id)
16
+
17
+ @mode = '%06o' % raw_blob_hash[:filemode]
18
+ @size = blob.size
19
+ @path = encode!(raw_blob_hash[:path])
20
+ @name = @path.split('/').last
21
+ @data = encode!(blob.content)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -42,7 +42,7 @@ module Elasticsearch
42
42
  },
43
43
  tokenizer: {
44
44
  sha_tokenizer: {
45
- type: "NGram",
45
+ type: "edgeNGram",
46
46
  min_gram: 8,
47
47
  max_gram: 40,
48
48
  token_chars: %w(letter digit)
@@ -61,7 +61,7 @@ module Elasticsearch
61
61
  max_gram: 20
62
62
  },
63
63
  sha_ngrams: {
64
- type: "NGram",
64
+ type: "edgeNGram",
65
65
  min_gram: 8,
66
66
  max_gram: 40
67
67
  }
@@ -1,9 +1,10 @@
1
1
  require 'active_support/concern'
2
2
  require 'active_model'
3
3
  require 'elasticsearch'
4
- require 'elasticsearch/model'
4
+ require 'elasticsearch/git/model'
5
+ require 'elasticsearch/git/encoder_helper'
6
+ require 'elasticsearch/git/lite_blob'
5
7
  require 'rugged'
6
- require 'linguist'
7
8
 
8
9
  module Elasticsearch
9
10
  module Git
@@ -12,30 +13,37 @@ module Elasticsearch
12
13
 
13
14
  included do
14
15
  include Elasticsearch::Git::Model
16
+ include Elasticsearch::Git::EncoderHelper
15
17
 
16
- mapping do
18
+ mapping _timestamp: { enabled: true } do
17
19
  indexes :blob do
18
20
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
21
  indexes :rid, type: :string, index: :not_analyzed
20
- indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
- indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
22
- indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
23
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
24
+ indexes :path, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
25
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
26
+ indexes :language, type: :string, index: :not_analyzed
23
27
  end
28
+
24
29
  indexes :commit do
25
30
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
26
31
  indexes :rid, type: :string, index: :not_analyzed
27
- indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
32
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
33
+
28
34
  indexes :author do
29
- indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
30
- indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
35
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
36
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
31
37
  indexes :time, type: :date
32
38
  end
39
+
33
40
  indexes :commiter do
34
- indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
35
- indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
41
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
42
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
36
43
  indexes :time, type: :date
37
44
  end
38
- indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
45
+
46
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :code_analyzer
39
47
  end
40
48
  end
41
49
 
@@ -79,7 +87,8 @@ module Elasticsearch
79
87
  end
80
88
 
81
89
  diff = repository_for_indexing.diff(from_rev, to_rev)
82
- diff.deltas.reverse.each do |delta|
90
+
91
+ diff.deltas.reverse.each_with_index do |delta, step|
83
92
  if delta.status == :deleted
84
93
  b = LiteBlob.new(repository_for_indexing, delta.old_file)
85
94
  delete_from_index_blob(b)
@@ -87,14 +96,20 @@ module Elasticsearch
87
96
  b = LiteBlob.new(repository_for_indexing, delta.new_file)
88
97
  index_blob(b, target_sha)
89
98
  end
99
+
100
+ # Run GC every 100 blobs
101
+ ObjectSpace.garbage_collect if step % 100 == 0
90
102
  end
91
103
  else
92
104
  if repository_for_indexing.bare?
93
105
  recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
94
106
  else
95
- repository_for_indexing.index.each do |blob|
107
+ repository_for_indexing.index.each_with_index do |blob, step|
96
108
  b = LiteBlob.new(repository_for_indexing, blob)
97
109
  index_blob(b, target_sha)
110
+
111
+ # Run GC every 100 blobs
112
+ ObjectSpace.garbage_collect if step % 100 == 0
98
113
  end
99
114
  end
100
115
  end
@@ -108,29 +123,51 @@ module Elasticsearch
108
123
  index_blob(b, target_sha)
109
124
  end
110
125
 
126
+ # Run GC every recurse step
127
+ ObjectSpace.garbage_collect
128
+
111
129
  tree.each_tree do |nested_tree|
112
130
  recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
113
131
  end
114
132
  end
115
133
 
116
134
  def index_blob(blob, target_sha)
117
- if blob.text?
118
- client_for_indexing.index \
119
- index: "#{self.class.index_name}",
120
- type: "repository",
121
- id: "#{repository_id}_#{blob.path}",
122
- body: {
123
- blob: {
124
- type: "blob",
125
- oid: blob.id,
126
- rid: repository_id,
127
- content: blob.data,
128
- commit_sha: target_sha
135
+ if can_index_blob?(blob)
136
+ tries = 0
137
+ begin
138
+ client_for_indexing.index \
139
+ index: "#{self.class.index_name}",
140
+ type: "repository",
141
+ id: "#{repository_id}_#{blob.path}",
142
+ body: {
143
+ blob: {
144
+ type: "blob",
145
+ oid: blob.id,
146
+ rid: repository_id,
147
+ content: blob.data,
148
+ commit_sha: target_sha,
149
+ path: blob.path,
150
+ language: blob.language ? blob.language.name : "Text"
151
+ }
129
152
  }
130
- }
153
+ rescue Exception => ex
154
+ # Retry 10 times send request
155
+ if tries < 10
156
+ tries += 1
157
+ sleep tries * 10 * rand(10)
158
+ retry
159
+ else
160
+ logger.warn "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
161
+ end
162
+ end
131
163
  end
132
164
  end
133
165
 
166
+ # Index text-like files which size less 1.mb
167
+ def can_index_blob?(blob)
168
+ blob.text? && (blob.size && blob.size.to_i < 1048576)
169
+ end
170
+
134
171
  def delete_from_index_blob(blob)
135
172
  if blob.text?
136
173
  begin
@@ -140,6 +177,8 @@ module Elasticsearch
140
177
  id: "#{repository_id}_#{blob.path}"
141
178
  rescue Elasticsearch::Transport::Transport::Errors::NotFound
142
179
  return true
180
+ rescue Exception => ex
181
+ logger.warn "Error with remove file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}"
143
182
  end
144
183
  end
145
184
  end
@@ -167,52 +206,74 @@ module Elasticsearch
167
206
  #
168
207
  # For search from commits use type 'commit'
169
208
  def index_commits(from_rev: nil, to_rev: nil)
170
- if to_rev.present?
209
+ to_rev = repository_for_indexing.head.target unless to_rev.present?
210
+
211
+ if to_rev != "0000000000000000000000000000000000000000"
212
+ # If to_rev correct
213
+ begin
214
+ raise unless repository_for_indexing.lookup(to_rev).type == :commit
215
+ rescue
216
+ raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
217
+ end
218
+
171
219
  begin
172
220
  if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
173
221
  raise unless repository_for_indexing.lookup(from_rev).type == :commit
174
222
  end
175
- if to_rev != "0000000000000000000000000000000000000000"
176
- raise unless repository_for_indexing.lookup(to_rev).type == :commit
177
- end
178
223
  rescue
179
224
  raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
180
225
  end
181
226
 
182
- walker = if from_rev == "0000000000000000000000000000000000000000" || from_rev.nil?
183
- repository_for_indexing.walk(to_rev)
184
- else
185
- repository_for_indexing.walk(from_rev, to_rev)
186
- end
227
+ # If pushed new branch no need reindex all repository
228
+ # Find merge_base and reindex diff
229
+ if from_rev == "0000000000000000000000000000000000000000" && to_rev != repository_for_indexing.head.target
230
+ from_rev = repository_for_indexing.merge_base(to_rev, repository_for_indexing.head.target)
231
+ end
187
232
 
188
- walker.each do |commit|
189
- index_commit(commit)
233
+ walker = Rugged::Walker.new(repository_for_indexing)
234
+ walker.push(to_rev)
235
+
236
+ if from_rev.present? && from_rev != "0000000000000000000000000000000000000000"
237
+ walker.hide(from_rev)
190
238
  end
191
- else
192
- repository_for_indexing.each_id do |oid|
193
- obj = repository_for_indexing.lookup(oid)
194
- if obj.type == :commit
195
- index_commit(obj)
196
- end
239
+
240
+ commits = walker.map { |c| c.oid }
241
+ walker.reset
242
+
243
+ commits.each_with_index do |commit, step|
244
+ index_commit(repository_for_indexing.lookup(commit))
245
+ ObjectSpace.garbage_collect if step % 100 == 0
197
246
  end
198
247
  end
199
248
  end
200
249
 
201
250
  def index_commit(commit)
202
- client_for_indexing.index \
203
- index: "#{self.class.index_name}",
204
- type: "repository",
205
- id: "#{repository_id}_#{commit.oid}",
206
- body: {
207
- commit: {
208
- type: "commit",
209
- rid: repository_id,
210
- sha: commit.oid,
211
- author: commit.author,
212
- committer: commit.committer,
213
- message: commit.message
251
+ tries = 0
252
+ begin
253
+ client_for_indexing.index \
254
+ index: "#{self.class.index_name}",
255
+ type: "repository",
256
+ id: "#{repository_id}_#{commit.oid}",
257
+ body: {
258
+ commit: {
259
+ type: "commit",
260
+ rid: repository_id,
261
+ sha: commit.oid,
262
+ author: commit.author,
263
+ committer: commit.committer,
264
+ message: encode!(commit.message)
265
+ }
214
266
  }
215
- }
267
+ rescue Exception => ex
268
+ # Retry 10 times send request
269
+ if tries < 10
270
+ tries += 1
271
+ sleep tries * 10 * rand(10)
272
+ retry
273
+ else
274
+ logger.warn "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}"
275
+ end
276
+ end
216
277
  end
217
278
 
218
279
  # Representation of repository as indexed json
@@ -235,7 +296,7 @@ module Elasticsearch
235
296
  result.push(recurse_blobs_index_hash(tree))
236
297
  else
237
298
  repository_for_indexing.index.each do |blob|
238
- b = EasyBlob.new(repository_for_indexing, blob)
299
+ b = LiteBlob.new(repository_for_indexing, blob)
239
300
  result.push(
240
301
  {
241
302
  type: 'blob',
@@ -290,7 +351,7 @@ module Elasticsearch
290
351
  sha: obj.oid,
291
352
  author: obj.author,
292
353
  committer: obj.committer,
293
- message: obj.message
354
+ message: encode!(obj.message)
294
355
  }
295
356
  )
296
357
  end
@@ -326,18 +387,18 @@ module Elasticsearch
326
387
  end
327
388
 
328
389
  def repository_for_indexing(repo_path = "")
390
+ return @rugged_repo_indexer if defined? @rugged_repo_indexer
391
+
329
392
  @path_to_repo ||= repo_path
330
393
  set_repository_id
331
- Rugged::Repository.new(@path_to_repo)
394
+ @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
332
395
  end
333
396
 
334
397
  def client_for_indexing
335
398
  @client_for_indexing ||= Elasticsearch::Client.new log: true
336
399
  end
337
- end
338
400
 
339
- module ClassMethods
340
- def search(query, type: :all, page: 1, per: 20, options: {})
401
+ def self.search(query, type: :all, page: 1, per: 20, options: {})
341
402
  results = { blobs: [], commits: []}
342
403
  case type.to_sym
343
404
  when :all
@@ -352,6 +413,12 @@ module Elasticsearch
352
413
  results
353
414
  end
354
415
 
416
+ def logger
417
+ @logger ||= Logger.new(STDOUT)
418
+ end
419
+ end
420
+
421
+ module ClassMethods
355
422
  def search_commit(query, page: 1, per: 20, options: {})
356
423
  page ||= 1
357
424
 
@@ -364,11 +431,19 @@ module Elasticsearch
364
431
  multi_match: {
365
432
  fields: fields,
366
433
  query: "#{query}",
367
- operator: :and
434
+ operator: :or
368
435
  }
369
436
  },
370
437
  },
371
438
  },
439
+ facets: {
440
+ commitRepositoryFaset: {
441
+ terms: {
442
+ field: "commit.rid",
443
+ all_term: true
444
+ }
445
+ }
446
+ },
372
447
  size: per,
373
448
  from: per * (page - 1)
374
449
  }
@@ -387,11 +462,28 @@ module Elasticsearch
387
462
  }
388
463
  end
389
464
 
465
+ options[:order] = :default if options[:order].blank?
466
+ order = case options[:order].to_sym
467
+ when :recently_indexed
468
+ { _timestamp: { order: :desc, mode: :min } }
469
+ when :last_indexed
470
+ { _timestamp: { order: :asc, mode: :min } }
471
+ else
472
+ {}
473
+ end
474
+
475
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
476
+
390
477
  if options[:highlight]
391
- query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
478
+ #query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
392
479
  end
393
480
 
394
- self.__elasticsearch__.search(query_hash).results
481
+ res = self.__elasticsearch__.search(query_hash)
482
+ {
483
+ results: res.results,
484
+ total_count: res.total_count,
485
+ repositories: res.response["facets"]["commitRepositoryFaset"]["terms"]
486
+ }
395
487
  end
396
488
 
397
489
  def search_blob(query, type: :all, page: 1, per: 20, options: {})
@@ -404,12 +496,26 @@ module Elasticsearch
404
496
  match: {
405
497
  'blob.content' => {
406
498
  query: "#{query}",
407
- operator: :and
499
+ operator: :or
408
500
  }
409
501
  }
410
502
  }
411
503
  }
412
504
  },
505
+ facets: {
506
+ languageFacet: {
507
+ terms: {
508
+ field: :language,
509
+ all_term: true
510
+ }
511
+ },
512
+ blobRepositoryFaset: {
513
+ terms: {
514
+ field: :rid,
515
+ all_term: true
516
+ }
517
+ }
518
+ },
413
519
  size: per,
414
520
  from: per * (page - 1)
415
521
  }
@@ -423,56 +529,48 @@ module Elasticsearch
423
529
  }
424
530
  end
425
531
 
426
- if options[:highlight]
427
- query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
532
+ if options[:language]
533
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
534
+ query_hash[:query][:filtered][:filter][:and] << {
535
+ terms: {
536
+ "blob.language" => [options[:language]].flatten
537
+ }
538
+ }
428
539
  end
429
540
 
430
- self.__elasticsearch__.search(query_hash).results
431
- end
432
- end
433
- end
434
-
435
- class LiteBlob
436
- include Linguist::BlobHelper
541
+ options[:order] = :default if options[:order].blank?
542
+ order = case options[:order].to_sym
543
+ when :recently_indexed
544
+ { _timestamp: { order: :desc, mode: :min } }
545
+ when :last_indexed
546
+ { _timestamp: { order: :asc, mode: :min } }
547
+ else
548
+ {}
549
+ end
437
550
 
438
- attr_accessor :id, :name, :path, :data, :commit_id
551
+ query_hash[:sort] = order.blank? ? [:_score] : [order, :_score]
439
552
 
440
- def initialize(repo, raw_blob_hash)
441
- @id = raw_blob_hash[:oid]
442
- @path = raw_blob_hash[:path]
443
- @name = @path.split("/").last
444
- @data = encode!(repo.lookup(@id).content)
445
- end
446
-
447
- def encode!(message)
448
- return nil unless message.respond_to? :force_encoding
449
-
450
- # if message is utf-8 encoding, just return it
451
- message.force_encoding("UTF-8")
452
- return message if message.valid_encoding?
553
+ if options[:highlight]
554
+ query_hash[:highlight] = {
555
+ pre_tags: [""],
556
+ post_tags: [""],
557
+ fields: {
558
+ "blob.content" => {},
559
+ "type" => "fvh",
560
+ "boundary_chars" => "\n"
561
+ }
562
+ }
563
+ end
453
564
 
454
- # return message if message type is binary
455
- detect = CharlockHolmes::EncodingDetector.detect(message)
456
- return message.force_encoding("BINARY") if detect && detect[:type] == :binary
565
+ res = self.__elasticsearch__.search(query_hash)
457
566
 
458
- # encoding message to detect encoding
459
- if detect && detect[:encoding]
460
- message.force_encoding(detect[:encoding])
567
+ {
568
+ results: res.results,
569
+ total_count: res.total_count,
570
+ languages: res.response["facets"]["languageFacet"]["terms"],
571
+ repositories: res.response["facets"]["blobRepositoryFaset"]["terms"]
572
+ }
461
573
  end
462
-
463
- # encode and clean the bad chars
464
- message.replace clean(message)
465
- rescue
466
- encoding = detect ? detect[:encoding] : "unknown"
467
- "--broken encoding: #{encoding}"
468
- end
469
-
470
- private
471
-
472
- def clean(message)
473
- message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
474
- .encode("UTF-8")
475
- .gsub("\0".encode("UTF-8"), "")
476
574
  end
477
575
  end
478
576
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticsearch
2
2
  module Git
3
- VERSION = "0.0.3"
3
+ VERSION = "0.0.4"
4
4
  end
5
5
  end
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch-git
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey Kumanyaev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-19 00:00:00.000000000 Z
11
+ date: 2014-04-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch-model
@@ -28,87 +28,87 @@ dependencies:
28
28
  name: elasticsearch-api
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - '>'
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - '>'
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rugged
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 0.19.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 0.19.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: charlock_holmes
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.6.9
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 0.6.9
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: gitlab-linguist
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - '>'
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: 2.9.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - '>'
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: 2.9.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: activemodel
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - ~>
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: 4.0.0
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ~>
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: 4.0.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: activesupport
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - '>='
101
+ - - ~>
102
102
  - !ruby/object:Gem::Version
103
- version: '0'
103
+ version: 4.0.0
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - '>='
108
+ - - ~>
109
109
  - !ruby/object:Gem::Version
110
- version: '0'
111
- description: Elasticsearch integrations for git repositories.
110
+ version: 4.0.0
111
+ description: Elasticsearch integrations for indexing git repositories.
112
112
  email:
113
113
  - me@zzet.org
114
114
  executables: []
@@ -122,10 +122,12 @@ files:
122
122
  - Rakefile
123
123
  - elasticsearch-git.gemspec
124
124
  - lib/elasticsearch/git.rb
125
+ - lib/elasticsearch/git/encoder_helper.rb
126
+ - lib/elasticsearch/git/lite_blob.rb
125
127
  - lib/elasticsearch/git/model.rb
126
128
  - lib/elasticsearch/git/repository.rb
127
129
  - lib/elasticsearch/git/version.rb
128
- - lib/test/test_helper.rb
130
+ - test/test_helper.rb
129
131
  homepage: https://github.com/zzet/elasticsearch-git
130
132
  licenses:
131
133
  - MIT
@@ -150,4 +152,5 @@ rubygems_version: 2.0.3
150
152
  signing_key:
151
153
  specification_version: 4
152
154
  summary: Elasticsearch integrations for git repositories.
153
- test_files: []
155
+ test_files:
156
+ - test/test_helper.rb