elasticsearch-git 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a7d0d063702f3bcc2f71f5a8ad9b55ec0377280
4
- data.tar.gz: 656199334b4345491eddb15fd3f4e3ffd0a0b597
3
+ metadata.gz: 743ab16ca2c38bbe9ab23dc7447c5c41d578f415
4
+ data.tar.gz: 4fba8f5da92706100fa6b01e582cbcd16a83d5ad
5
5
  SHA512:
6
- metadata.gz: bf49c347091bb8866356b74bed98b9652b44474d7e5e5341257f06bf759a0eaf48efdf46ccdafa4211a2a7d3e98d390f33ff3b59900a3f7bc737c1020b25be75
7
- data.tar.gz: 99870c02c0532ed6f72b70ee7e962c778148d7bb794a73f20eebd0cafa74e859baf1fbc1d3443fcb90b87280d06018f40f43d8299651a7413985588b218f2c6e
6
+ metadata.gz: efe576c9de405aa4b22c89cfe21171d6de7ac5e686ed4207e73e085cf83d673b114dced51cc6ecb051b6f38aeba9227bf45619cd8f65b7a228101a703d250599
7
+ data.tar.gz: 2e18154c17713a627d3d4b9519f279348bcdccd0779819e7164f1d01e9c62092dc8c5c408b34782e2f46f6c8778a8c52fa75a3985537fcfb30baa36c5088cd66
data/Gemfile CHANGED
@@ -9,7 +9,8 @@ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development',
9
9
  gem 'bundler'
10
10
  gem 'rake'
11
11
  gem 'pry'
12
- gem 'gitlab_git'
12
+ gem 'charlock_holmes', '~> 0.6.9'
13
+ gem 'gitlab-linguist', '~> 3.0.0'
13
14
  gem 'minitest'
14
15
  gem 'activesupport', '> 4.0.0'
15
16
  gem 'activemodel', '> 4.0.0'
data/README.md CHANGED
@@ -30,9 +30,11 @@ $ gem install elasticsearch-git
30
30
  class Repository
31
31
  include Elasticsearch::Git::Repository
32
32
 
33
- set_repository_id project.id
34
- repository_for_indexing '/path/to/your/repo'
33
+ def repository_id
34
+ project.id
35
+ end
35
36
 
37
+ repository_for_indexing '/path/to/your/repo'
36
38
  end
37
39
 
38
40
  Repository.__elasticsearch__.create_index! force: true
@@ -41,6 +43,9 @@ repo = Repository.new
41
43
  repo.index_commits
42
44
  repo.index_blobs
43
45
 
46
+ repo.index_commits(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
47
+ repo.index_blobs(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
48
+
44
49
  Repository.search("query", type: 'blob')
45
50
  Repository.search("query", type: 'commit')
46
51
 
@@ -48,6 +53,257 @@ Repository.search("query", type: 'commit')
48
53
  Repository.search("query")
49
54
  ```
50
55
 
56
+ ## Integration with Gitlab
57
+
58
+ ``` ruby
59
+ # app/elastic/repositories_search.rb
60
+ module RepositoriesSearch
61
+ extend ActiveSupport::Concern
62
+
63
+ included do
64
+ include Elasticsearch::Git::Repository
65
+
66
+ def repository_id
67
+ project.id
68
+ end
69
+ end
70
+ end
71
+
72
+
73
+ # app/models/repository.rb
74
+ class Repository
75
+ include RepositoriesSearch
76
+ #...
77
+ def project
78
+ @project ||= Project.find_with_namespace(@path_with_namespace)
79
+ end
80
+ #...
81
+ end
82
+
83
+ Project.last.repository.__elasticsearch__.create_index! force: true
84
+ Project.last.repository.index_commits
85
+ Project.last.repository.index_blobs
86
+
87
+ Repository.search("some_query")
88
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
89
+
90
+ Repository.search("some_query", type: :blob)
91
+ # => {blobs: [{}, {}, {}], commits: []}
92
+
93
+ Repository.search("some_query", type: :commit)
94
+ # => {blobs: [], commits: [{}, {}, {}]}
95
+
96
+ Repository.search("some_query", type: :commit, page: 2, per: 50)
97
+ # => ...
98
+
99
+ Repository.search("some_query", options: { repository_id: Project.last.id })
100
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
101
+
102
+ Repository.search("some_query", options: { repository_id: current_user.authorized_projects.ids })
103
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
104
+
105
+ Project.last.repository.search("Copyright")[:blobs].first
106
+ => #<Elasticsearch::Model::Response::Result:0xbb84b3fc
107
+ @result=
108
+ {"_index"=>"repository-index-development",
109
+ "_type"=>"repository",
110
+ "_id"=>"4328_LICENSE.txt",
111
+ "_score"=>0.034848917,
112
+ "_source"=>
113
+ {"blob"=>
114
+ {"type"=>"blob",
115
+ "oid"=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
116
+ "rid"=>4328,
117
+ "content"=>
118
+ "Copyright (c) 2014 Andrey Kumanyaev\n\nMIT
119
+ License\n\nPermission is hereby granted, free of charge, to any person
120
+ obtaining\na copy of this software and associated documentation files
121
+ (the\n\"Software\"), to deal in the Software without restriction,
122
+ including\nwithout limitation the rights to use, copy, modify, merge,
123
+ publish,\ndistribute, sublicense, and/or sell copies of the Software,
124
+ and to\npermit persons to whom the Software is furnished to do so,
125
+ subject to\nthe following conditions:\n\nThe above copyright notice and
126
+ this permission notice shall be\nincluded in all copies or substantial
127
+ portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT
128
+ WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
129
+ THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
130
+ AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
131
+ BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
132
+ ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
133
+ CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
134
+ SOFTWARE.\n",
135
+ "commit_sha"=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}}}>
136
+ ```
137
+
138
+ ## Examples
139
+
140
+ After integration this gem into [Gitlab](https://github.com/gitlabhq/gitlabhq)
141
+
142
+ ``` ruby
143
+ Repository.search("too")[:commits].first
144
+ => #<Elasticsearch::Model::Response::Result:0xbb50dfdc
145
+ @result=
146
+ {"_index"=>"repository-index-development",
147
+ "_type"=>"repository",
148
+ "_id"=>"4328_1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
149
+ "_score"=>0.15873253,
150
+ "_source"=>
151
+ {"commit"=>
152
+ {"type"=>"commit",
153
+ "rid"=>4328,
154
+ "sha"=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
155
+ "author"=>
156
+ {"name"=>"Andrey Kumanyaev",
157
+ "email"=>"me@zzet.org",
158
+ "time"=>"2014-02-16T02:24:23+04:00"},
159
+ "committer"=>
160
+ {"name"=>"Andrey Kumanyaev",
161
+ "email"=>"me@zzet.org",
162
+ "time"=>"2014-02-16T02:24:23+04:00"},
163
+ "message"=>"Save 2. Indexing work. Search too\n"}}}>
164
+
165
+
166
+ Project.last.repository.as_indexed_json
167
+ Project Load (1.7ms) SELECT "projects".* FROM "projects" ORDER BY "projects"."id" DESC LIMIT 1
168
+ Namespace Load (4.8ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = $1 ORDER BY "namespaces"."id" ASC LIMIT 1 [["id", 3739]]
169
+ Namespace Load (0.9ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."path" = 'zzet' LIMIT 1
170
+ Project Load (0.7ms) SELECT "projects".* FROM "projects" WHERE "projects"."namespace_id" = 3739 AND "projects"."path" = 'elasticsearch-git' LIMIT 1
171
+ # Long lines are stripped manually
172
+ => {:blobs=>
173
+ [[{:type=>"blob",
174
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_.gitignore",
175
+ :rid=>4328,
176
+ :oid=>"d87d4be66f458acd52878902bbf1391732ad21e1",
177
+ :content=>
178
+ "*.gem\n*.rbc\n.bundle\n.config\n.yardoc\nGemfile.lock\nInstalledFiles\n_yardoc\ncoverage\ndoc/\nlib/bundler/man\npkg\nrdoc\nspec/reports\ntest/tmp\ntest/version_tmp\ntmp\n",
179
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
180
+ {:type=>"blob",
181
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Gemfile",
182
+ :rid=>4328,
183
+ :oid=>"7322405f8f3ee5de24f7a727940ac52543e8954c",
184
+ :content=>
185
+ "source 'https://rubygems.org'\n\n# Specify your gem's dependencies in elasticsearch-git.gemspec\ngemspec\n\ngem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'\ngem 'elasticsearc....."
186
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
187
+ {:type=>"blob",
188
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_LICENSE.txt",
189
+ :rid=>4328,
190
+ :oid=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
191
+ :content=>
192
+ "Copyright (c) 2014 Andrey Kumanyaev\n\nMIT License\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Softw...."
193
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
194
+ {:type=>"blob",
195
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_README.md",
196
+ :rid=>4328,
197
+ :oid=>"8258d574dfc8040a5d003f06c6493e0033527f36",
198
+ :content=>
199
+ "# Elasticsearch::Git\n\nAttention: Pre-pre-pre beta code. Not production.\n\n[Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for g...."
200
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
201
+ {:type=>"blob",
202
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Rakefile",
203
+ :rid=>4328,
204
+ :oid=>"29955274e0d42e164337c411ad9144e8ffd7e46e",
205
+ :content=>"require \"bundler/gem_tasks\"\n",
206
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
207
+ {:type=>"blob",
208
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch-git.gemspec",
209
+ :rid=>4328,
210
+ :oid=>"67762437568dda1bb98ec5eca8be7e4a5c8115a9",
211
+ :content=>
212
+ "# coding: utf-8\nlib = File.expand_path('../lib', __FILE__)\n$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)\nrequire 'elasticsearch/git/version'\n\nGem::Specification.new do |spec|\n spec..."
213
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
214
+ {:type=>"blob",
215
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch/git.rb",
216
+ :rid=>4328,
217
+ :oid=>"d3817ec58af1f44dfd18856bf54ef2bf607901a8",
218
+ :content=>
219
+ "require \"elasticsearch/git/version\"\nrequire \"elasticsearch/git/model\"\nrequire \"elasticsearch/git/commit\"\n\nmodule Elasticsearch\n module Git\n class Test\n include Elasticsearch::..."
220
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
221
+ {:type=>"blob",
222
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/model.rb",
223
+ :rid=>4328,
224
+ :oid=>"3dfbae747f25391779fbe012fe8cc4f38cc4651c",
225
+ :content=>
226
+ "require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch/model'\n\nmodule Elasticsearch\n module Git\n module Model\n extend ActiveSupport::Concern\n\n include..."
227
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
228
+ {:type=>"blob",
229
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/repository.rb",
230
+ :rid=>4328,
231
+ :oid=>"70fe59c8391f6c27adb79c3e45824e6b4cf9566c",
232
+ :content=>
233
+ "require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch'\nrequire 'elasticsearch/model'\nrequire 'rugged'\nrequire 'gitlab_git'\n\nmodule Elasticsearch\n module Git\n m..."
234
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
235
+ {:type=>"blob",
236
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/version.rb",
237
+ :rid=>4328,
238
+ :oid=>"79e8082b122492464732f1fb43e9f2bdc96ea146",
239
+ :content=>
240
+ "module Elasticsearch\n module Git\n VERSION = \"0.0.1\"\n end\nend\n",
241
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
242
+ {:type=>"blob",
243
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_test/test_helper.rb",
244
+ :rid=>4328,
245
+ :oid=>"6acc0d2b7bf0f286557d3757c1140b41ab57e8f7",
246
+ :content=>
247
+ "require \"rubygems\"\nrequire 'bundler/setup'\nrequire 'pry'\n\nBundler.require\n\nrequire 'wrong/adapters/minitest'\n\nPROJECT_ROOT = File.join(Dir.pwd)\n\nWrong.config.color\n\nMinitest.autorun\n..."
248
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}]],
249
+ :commits=>
250
+ [{:type=>"commit",
251
+ :sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28",
252
+ :author=>
253
+ {:name=>"Andrey Kumanyaev",
254
+ :email=>"me@zzet.org",
255
+ :time=>2014-02-16 13:50:32 +0400},
256
+ :committer=>
257
+ {:name=>"Andrey Kumanyaev",
258
+ :email=>"me@zzet.org",
259
+ :time=>2014-02-16 13:50:32 +0400},
260
+ :message=>"Improve readme\n"},
261
+ {:type=>"commit",
262
+ :sha=>"37f1b0710eb7f41254ae0c33db09794a25bbb246",
263
+ :author=>
264
+ {:name=>"Andrey Kumanyaev",
265
+ :email=>"me@zzet.org",
266
+ :time=>2014-02-16 13:49:25 +0400},
267
+ :committer=>
268
+ {:name=>"Andrey Kumanyaev",
269
+ :email=>"me@zzet.org",
270
+ :time=>2014-02-16 13:49:25 +0400},
271
+ :message=>"prepare first test release\n"},
272
+ {:type=>"commit",
273
+ :sha=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
274
+ :author=>
275
+ {:name=>"Andrey Kumanyaev",
276
+ :email=>"me@zzet.org",
277
+ :time=>2014-02-16 02:24:23 +0400},
278
+ :committer=>
279
+ {:name=>"Andrey Kumanyaev",
280
+ :email=>"me@zzet.org",
281
+ :time=>2014-02-16 02:24:23 +0400},
282
+ :message=>"Save 2. Indexing work. Search too\n"},
283
+ {:type=>"commit",
284
+ :sha=>"3ed383bfbf6cba611d191dbc3590779c0444b7f0",
285
+ :author=>
286
+ {:name=>"Andrey Kumanyaev",
287
+ :email=>"me@zzet.org",
288
+ :time=>2014-02-16 00:23:10 +0400},
289
+ :committer=>
290
+ {:name=>"Andrey Kumanyaev",
291
+ :email=>"me@zzet.org",
292
+ :time=>2014-02-16 00:23:10 +0400},
293
+ :message=>"Save commit\n"},
294
+ {:type=>"commit",
295
+ :sha=>"7021addf520a19bdeceef29947c8687965c132ff",
296
+ :author=>
297
+ {:name=>"Andrey Kumanyaev",
298
+ :email=>"me@zzet.org",
299
+ :time=>2014-02-15 14:28:43 +0400},
300
+ :committer=>
301
+ {:name=>"Andrey Kumanyaev",
302
+ :email=>"me@zzet.org",
303
+ :time=>2014-02-15 14:28:43 +0400},
304
+ :message=>"first commit\n"}]}
305
+ ```
306
+
51
307
  ## Contributing
52
308
 
53
309
  1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency 'elasticsearch-model'
22
22
  spec.add_runtime_dependency 'elasticsearch-api'
23
23
  spec.add_runtime_dependency 'rugged'
24
- spec.add_runtime_dependency 'gitlab_git'
24
+ spec.add_runtime_dependency 'charlock_holmes'
25
+ spec.add_runtime_dependency 'gitlab-linguist'
25
26
  spec.add_runtime_dependency 'activemodel'
26
27
  spec.add_runtime_dependency 'activesupport'
27
28
  end
@@ -1,12 +1,9 @@
1
1
  require "elasticsearch/git/version"
2
2
  require "elasticsearch/git/model"
3
- require "elasticsearch/git/commit"
3
+ require "elasticsearch/git/repository"
4
4
 
5
5
  module Elasticsearch
6
6
  module Git
7
- class Test
8
- include Elasticsearch::Git::Model
9
- end
10
7
  end
11
8
  end
12
9
 
@@ -12,7 +12,13 @@ module Elasticsearch
12
12
  include ActiveModel::Model
13
13
  include Elasticsearch::Model
14
14
 
15
- index_name [self.name.downcase, 'index', Rails.env.to_s].join('-')
15
+ env = if defined?(::Rails)
16
+ ::Rails.env.to_s
17
+ else
18
+ "undefined"
19
+ end
20
+
21
+ index_name [self.name.downcase, 'index', env].join('-')
16
22
 
17
23
  settings \
18
24
  index: {
@@ -3,7 +3,7 @@ require 'active_model'
3
3
  require 'elasticsearch'
4
4
  require 'elasticsearch/model'
5
5
  require 'rugged'
6
- require 'gitlab_git'
6
+ require 'linguist'
7
7
 
8
8
  module Elasticsearch
9
9
  module Git
@@ -14,14 +14,16 @@ module Elasticsearch
14
14
  include Elasticsearch::Git::Model
15
15
 
16
16
  mapping do
17
- indexes :blobs do
17
+ indexes :blob do
18
18
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
+ indexes :rid, type: :string, index: :not_analyzed
19
20
  indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
20
21
  indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
22
  indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
23
  end
23
- indexes :commits do
24
+ indexes :commit do
24
25
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
26
+ indexes :rid, type: :string, index: :not_analyzed
25
27
  indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
26
28
  indexes :author do
27
29
  indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
@@ -51,23 +53,89 @@ module Elasticsearch
51
53
  # }
52
54
  #
53
55
  # For search from blobs use type 'blob'
54
- def index_blobs
55
- target_sha = repository_for_indexing.head.target
56
- repository_for_indexing.index.each do |blob|
56
+ def index_blobs(from_rev: nil, to_rev: nil)
57
+
58
+ if to_rev.present?
59
+ begin
60
+ raise unless repository_for_indexing.lookup(to_rev).type == :commit
61
+ rescue
62
+ raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
63
+ end
64
+ else
65
+ to_rev = repository_for_indexing.head.target
66
+ end
67
+
68
+ target_sha = to_rev
69
+
70
+ if from_rev.present?
71
+ begin
72
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
73
+ rescue
74
+ raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
75
+ end
76
+
77
+ diff = repository_for_indexing.diff(from_rev, to_rev)
78
+ diff.deltas.reverse.each do |delta|
79
+ if delta.status == :deleted
80
+ b = LiteBlob.new(repository_for_indexing, delta.old_file)
81
+ delete_from_index_blob(b)
82
+ else
83
+ b = LiteBlob.new(repository_for_indexing, delta.new_file)
84
+ index_blob(b, target_sha)
85
+ end
86
+ end
87
+ else
88
+ if repository_for_indexing.bare?
89
+ recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
90
+ else
91
+ repository_for_indexing.index.each do |blob|
92
+ b = LiteBlob.new(repository_for_indexing, blob)
93
+ index_blob(b, target_sha)
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ # Indexing bare repository via walking through tree
100
+ def recurse_blobs_index(tree, target_sha, path = "")
101
+ tree.each_blob do |blob|
102
+ blob[:path] = path + blob[:name]
57
103
  b = LiteBlob.new(repository_for_indexing, blob)
58
- if b.text?
59
- client_for_indexing.index \
60
- index: "#{self.class.index_name}",
61
- type: "blob",
62
- id: "#{repository_id}_#{b.path}",
63
- body: {
64
- blob: {
65
- oid: b.id,
66
- rid: repository_id,
67
- content: b.data,
68
- commit_sha: target_sha
69
- }
104
+ index_blob(b, target_sha)
105
+ end
106
+
107
+ tree.each_tree do |nested_tree|
108
+ recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
109
+ end
110
+ end
111
+
112
+ def index_blob(blob, target_sha)
113
+ if blob.text?
114
+ client_for_indexing.index \
115
+ index: "#{self.class.index_name}",
116
+ type: "repository",
117
+ id: "#{repository_id}_#{blob.path}",
118
+ body: {
119
+ blob: {
120
+ type: "blob",
121
+ oid: blob.id,
122
+ rid: repository_id,
123
+ content: blob.data,
124
+ commit_sha: target_sha
70
125
  }
126
+ }
127
+ end
128
+ end
129
+
130
+ def delete_from_index_blob(blob)
131
+ if blob.text?
132
+ begin
133
+ client_for_indexing.delete \
134
+ index: "#{self.class.index_name}",
135
+ type: "repository",
136
+ id: "#{repository_id}_#{blob.path}"
137
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
138
+ return true
71
139
  end
72
140
  end
73
141
  end
@@ -94,27 +162,45 @@ module Elasticsearch
94
162
  # }
95
163
  #
96
164
  # For search from commits use type 'commit'
97
- def index_commits
98
- repository_for_indexing.each_id do |oid|
99
- obj = repository_for_indexing.lookup(oid)
100
- if obj.type == :commit
101
- client_for_indexing.index \
102
- index: "#{self.class.index_name}",
103
- type: "commit",
104
- id: "#{repository_id}_#{obj.oid}",
105
- body: {
106
- commit: {
107
- rid: repository_id,
108
- sha: obj.oid,
109
- author: obj.author,
110
- committer: obj.committer,
111
- message: obj.message
112
- }
113
- }
165
+ def index_commits(from_rev: nil, to_rev: nil)
166
+ if from_rev.present? && to_rev.present?
167
+ begin
168
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
169
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
170
+ rescue
171
+ raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
172
+ end
173
+
174
+ repository_for_indexing.walk(from_rev, to_rev).each do |commit|
175
+ index_commit(commit)
176
+ end
177
+ else
178
+ repository_for_indexing.each_id do |oid|
179
+ obj = repository_for_indexing.lookup(oid)
180
+ if obj.type == :commit
181
+ index_commit(obj)
182
+ end
114
183
  end
115
184
  end
116
185
  end
117
186
 
187
+ def index_commit(commit)
188
+ client_for_indexing.index \
189
+ index: "#{self.class.index_name}",
190
+ type: "repository",
191
+ id: "#{repository_id}_#{commit.oid}",
192
+ body: {
193
+ commit: {
194
+ type: "commit",
195
+ rid: repository_id,
196
+ sha: commit.oid,
197
+ author: commit.author,
198
+ committer: commit.committer,
199
+ message: commit.message
200
+ }
201
+ }
202
+ end
203
+
118
204
  # Representation of repository as indexed json
119
205
  # Attention: It can be very very very huge hash
120
206
  def as_indexed_json(options = {})
@@ -129,19 +215,52 @@ module Elasticsearch
129
215
  result = []
130
216
 
131
217
  target_sha = repository_for_indexing.head.target
132
- repository_for_indexing.index.each do |blob|
133
- b = EasyBlob.new(repository_for_indexing, blob)
218
+
219
+ if repository_for_indexing.bare?
220
+ tree = repository_for_indexing.lookup(target_sha).tree
221
+ result.push(recurse_blobs_index_hash(tree))
222
+ else
223
+ repository_for_indexing.index.each do |blob|
224
+ b = EasyBlob.new(repository_for_indexing, blob)
225
+ result.push(
226
+ {
227
+ type: 'blob',
228
+ id: "#{target_sha}_#{b.path}",
229
+ rid: repository_id,
230
+ oid: b.id,
231
+ content: b.data,
232
+ commit_sha: target_sha
233
+ }
234
+ ) if b.text?
235
+ end
236
+ end
237
+
238
+ result
239
+ end
240
+
241
+ def recurse_blobs_index_hash(tree, path = "")
242
+ result = []
243
+
244
+ tree.each_blob do |blob|
245
+ blob[:path] = path + blob[:name]
246
+ b = LiteBlob.new(repository_for_indexing, blob)
134
247
  result.push(
135
248
  {
136
- id: "#{target_sha}_#{b.path}",
249
+ type: 'blob',
250
+ id: "#{repository_for_indexing.head.target}_#{path}#{blob[:name]}",
251
+ rid: repository_id,
137
252
  oid: b.id,
138
253
  content: b.data,
139
- commit_sha: target_sha
254
+ commit_sha: repository_for_indexing.head.target
140
255
  }
141
256
  ) if b.text?
142
257
  end
143
258
 
144
- result
259
+ tree.each_tree do |nested_tree|
260
+ result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
261
+ end
262
+
263
+ result.flatten
145
264
  end
146
265
 
147
266
  # Lookup all object ids for commit objects
@@ -153,6 +272,7 @@ module Elasticsearch
153
272
  if obj.type == :commit
154
273
  res.push(
155
274
  {
275
+ type: 'commit',
156
276
  sha: obj.oid,
157
277
  author: obj.author,
158
278
  committer: obj.committer,
@@ -165,27 +285,141 @@ module Elasticsearch
165
285
  res
166
286
  end
167
287
 
288
+ def search(query, type: :all, page: 1, per: 20, options: {})
289
+ options[:repository_id] = repository_id if options[:repository_id].nil?
290
+ self.class.search(query, type: type, page: page, per: per, options: options)
291
+ end
292
+
168
293
  # Repository id used for identity data from different repositories
169
294
  # Update this value if need
170
- def set_repository_id id
295
+ def set_repository_id id = nil
171
296
  @repository_id = id || path_to_repo
172
297
  end
173
298
 
299
+ # For Overwrite
300
+ def repository_id
301
+ @repository_id
302
+ end
303
+
304
+ unless defined?(path_to_repo)
305
+ def path_to_repo
306
+ if @path_to_repo.blank?
307
+ raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
308
+ else
309
+ @path_to_repo
310
+ end
311
+ end
312
+ end
313
+
174
314
  def repository_for_indexing(repo_path = "")
175
315
  @path_to_repo ||= repo_path
316
+ set_repository_id
176
317
  Rugged::Repository.new(@path_to_repo)
177
318
  end
178
319
 
179
320
  def client_for_indexing
180
321
  @client_for_indexing ||= Elasticsearch::Client.new log: true
181
322
  end
323
+ end
324
+
325
+ module ClassMethods
326
+ def search(query, type: :all, page: 1, per: 20, options: {})
327
+ results = { blobs: [], commits: []}
328
+ case type.to_sym
329
+ when :all
330
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
331
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
332
+ when :blob
333
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
334
+ when :commit
335
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
336
+ end
337
+
338
+ results
339
+ end
340
+
341
+ def search_commit(query, page: 1, per: 20, options: {})
342
+ page ||= 1
343
+
344
+ fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
345
+
346
+ query_hash = {
347
+ query: {
348
+ filtered: {
349
+ query: {
350
+ multi_match: {
351
+ fields: fields,
352
+ query: "#{query}",
353
+ operator: :and
354
+ }
355
+ },
356
+ },
357
+ },
358
+ size: per,
359
+ from: per * (page - 1)
360
+ }
361
+
362
+ if query.blank?
363
+ query_hash[:query][:filtered][:query] = { match_all: {}}
364
+ query_hash[:track_scores] = true
365
+ end
366
+
367
+ if options[:repository_id]
368
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
369
+ query_hash[:query][:filtered][:filter][:and] << {
370
+ terms: {
371
+ "commit.rid" => [options[:repository_id]].flatten
372
+ }
373
+ }
374
+ end
375
+
376
+ if options[:highlight]
377
+ query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
378
+ end
379
+
380
+ self.__elasticsearch__.search(query_hash).results
381
+ end
182
382
 
383
+ def search_blob(query, type: :all, page: 1, per: 20, options: {})
384
+ page ||= 1
385
+
386
+ query_hash = {
387
+ query: {
388
+ filtered: {
389
+ query: {
390
+ match: {
391
+ 'blob.content' => {
392
+ query: "#{query}",
393
+ operator: :and
394
+ }
395
+ }
396
+ }
397
+ }
398
+ },
399
+ size: per,
400
+ from: per * (page - 1)
401
+ }
402
+
403
+ if options[:repository_id]
404
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
405
+ query_hash[:query][:filtered][:filter][:and] << {
406
+ terms: {
407
+ "blob.rid" => [options[:repository_id]].flatten
408
+ }
409
+ }
410
+ end
411
+
412
+ if options[:highlight]
413
+ query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
414
+ end
415
+
416
+ self.__elasticsearch__.search(query_hash).results
417
+ end
183
418
  end
184
419
  end
185
420
 
186
421
  class LiteBlob
187
422
  include Linguist::BlobHelper
188
- include EncodingHelper
189
423
 
190
424
  attr_accessor :id, :name, :path, :data, :commit_id
191
425
 
@@ -195,6 +429,37 @@ module Elasticsearch
195
429
  @name = @path.split("/").last
196
430
  @data = encode!(repo.lookup(@id).content)
197
431
  end
432
+
433
+ def encode!(message)
434
+ return nil unless message.respond_to? :force_encoding
435
+
436
+ # if message is utf-8 encoding, just return it
437
+ message.force_encoding("UTF-8")
438
+ return message if message.valid_encoding?
439
+
440
+ # return message if message type is binary
441
+ detect = CharlockHolmes::EncodingDetector.detect(message)
442
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
443
+
444
+ # encoding message to detect encoding
445
+ if detect && detect[:encoding]
446
+ message.force_encoding(detect[:encoding])
447
+ end
448
+
449
+ # encode and clean the bad chars
450
+ message.replace clean(message)
451
+ rescue
452
+ encoding = detect ? detect[:encoding] : "unknown"
453
+ "--broken encoding: #{encoding}"
454
+ end
455
+
456
+ private
457
+
458
+ def clean(message)
459
+ message.encode("UTF-16BE", :undef => :replace, :invalid => :replace, :replace => "")
460
+ .encode("UTF-8")
461
+ .gsub("\0".encode("UTF-8"), "")
462
+ end
198
463
  end
199
464
  end
200
465
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticsearch
2
2
  module Git
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch-git
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey Kumanyaev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch-model
@@ -53,7 +53,21 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: gitlab_git
56
+ name: charlock_holmes
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: gitlab-linguist
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - '>='