elasticsearch-git 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a7d0d063702f3bcc2f71f5a8ad9b55ec0377280
4
- data.tar.gz: 656199334b4345491eddb15fd3f4e3ffd0a0b597
3
+ metadata.gz: 743ab16ca2c38bbe9ab23dc7447c5c41d578f415
4
+ data.tar.gz: 4fba8f5da92706100fa6b01e582cbcd16a83d5ad
5
5
  SHA512:
6
- metadata.gz: bf49c347091bb8866356b74bed98b9652b44474d7e5e5341257f06bf759a0eaf48efdf46ccdafa4211a2a7d3e98d390f33ff3b59900a3f7bc737c1020b25be75
7
- data.tar.gz: 99870c02c0532ed6f72b70ee7e962c778148d7bb794a73f20eebd0cafa74e859baf1fbc1d3443fcb90b87280d06018f40f43d8299651a7413985588b218f2c6e
6
+ metadata.gz: efe576c9de405aa4b22c89cfe21171d6de7ac5e686ed4207e73e085cf83d673b114dced51cc6ecb051b6f38aeba9227bf45619cd8f65b7a228101a703d250599
7
+ data.tar.gz: 2e18154c17713a627d3d4b9519f279348bcdccd0779819e7164f1d01e9c62092dc8c5c408b34782e2f46f6c8778a8c52fa75a3985537fcfb30baa36c5088cd66
data/Gemfile CHANGED
@@ -9,7 +9,8 @@ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development',
9
9
  gem 'bundler'
10
10
  gem 'rake'
11
11
  gem 'pry'
12
- gem 'gitlab_git'
12
+ gem 'charlock_holmes', '~> 0.6.9'
13
+ gem 'gitlab-linguist', '~> 3.0.0'
13
14
  gem 'minitest'
14
15
  gem 'activesupport', '> 4.0.0'
15
16
  gem 'activemodel', '> 4.0.0'
data/README.md CHANGED
@@ -30,9 +30,11 @@ $ gem install elasticsearch-git
30
30
  class Repository
31
31
  include Elasticsearch::Git::Repository
32
32
 
33
- set_repository_id project.id
34
- repository_for_indexing '/path/to/your/repo'
33
+ def repository_id
34
+ project.id
35
+ end
35
36
 
37
+ repository_for_indexing '/path/to/your/repo'
36
38
  end
37
39
 
38
40
  Repository.__elasticsearch__.create_index! force: true
@@ -41,6 +43,9 @@ repo = Repository.new
41
43
  repo.index_commits
42
44
  repo.index_blobs
43
45
 
46
+ repo.index_commits(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
47
+ repo.index_blobs(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
48
+
44
49
  Repository.search("query", type: 'blob')
45
50
  Repository.search("query", type: 'commit')
46
51
 
@@ -48,6 +53,257 @@ Repository.search("query", type: 'commit')
48
53
  Repository.search("query")
49
54
  ```
50
55
 
56
+ ## Integration with Gitlab
57
+
58
+ ``` ruby
59
+ # app/elastic/repositories_search.rb
60
+ module RepositoriesSearch
61
+ extend ActiveSupport::Concern
62
+
63
+ included do
64
+ include Elasticsearch::Git::Repository
65
+
66
+ def repository_id
67
+ project.id
68
+ end
69
+ end
70
+ end
71
+
72
+
73
+ # app/models/repository.rb
74
+ class Repository
75
+ include RepositoriesSearch
76
+ #...
77
+ def project
78
+ @project ||= Project.find_with_namespace(@path_with_namespace)
79
+ end
80
+ #...
81
+ end
82
+
83
+ Project.last.repository.__elasticsearch__.create_index! force: true
84
+ Project.last.repository.index_commits
85
+ Project.last.repository.index_blobs
86
+
87
+ Repository.search("some_query")
88
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
89
+
90
+ Repository.search("some_query", type: :blob)
91
+ # => {blobs: [{}, {}, {}], commits: []}
92
+
93
+ Repository.search("some_query", type: :commit)
94
+ # => {blobs: [], commits: [{}, {}, {}]}
95
+
96
+ Repository.search("some_query", type: :commit, page: 2, per: 50)
97
+ # => ...
98
+
99
+ Repository.search("some_query", options: { repository_id: Project.last.id })
100
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
101
+
102
+ Repository.search("some_query", options: { repository_id: current_user.authorized_projects.ids })
103
+ # => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
104
+
105
+ Project.last.repository.search("Copyright")[:blobs].first
106
+ => #<Elasticsearch::Model::Response::Result:0xbb84b3fc
107
+ @result=
108
+ {"_index"=>"repository-index-development",
109
+ "_type"=>"repository",
110
+ "_id"=>"4328_LICENSE.txt",
111
+ "_score"=>0.034848917,
112
+ "_source"=>
113
+ {"blob"=>
114
+ {"type"=>"blob",
115
+ "oid"=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
116
+ "rid"=>4328,
117
+ "content"=>
118
+ "Copyright (c) 2014 Andrey Kumanyaev\n\nMIT
119
+ License\n\nPermission is hereby granted, free of charge, to any person
120
+ obtaining\na copy of this software and associated documentation files
121
+ (the\n\"Software\"), to deal in the Software without restriction,
122
+ including\nwithout limitation the rights to use, copy, modify, merge,
123
+ publish,\ndistribute, sublicense, and/or sell copies of the Software,
124
+ and to\npermit persons to whom the Software is furnished to do so,
125
+ subject to\nthe following conditions:\n\nThe above copyright notice and
126
+ this permission notice shall be\nincluded in all copies or substantial
127
+ portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT
128
+ WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
129
+ THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
130
+ AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
131
+ BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
132
+ ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
133
+ CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
134
+ SOFTWARE.\n",
135
+ "commit_sha"=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}}}>
136
+ ```
137
+
138
+ ## Examples
139
+
140
+ After integration this gem into [Gitlab](https://github.com/gitlabhq/gitlabhq)
141
+
142
+ ``` ruby
143
+ Repository.search("too")[:commits].first
144
+ => #<Elasticsearch::Model::Response::Result:0xbb50dfdc
145
+ @result=
146
+ {"_index"=>"repository-index-development",
147
+ "_type"=>"repository",
148
+ "_id"=>"4328_1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
149
+ "_score"=>0.15873253,
150
+ "_source"=>
151
+ {"commit"=>
152
+ {"type"=>"commit",
153
+ "rid"=>4328,
154
+ "sha"=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
155
+ "author"=>
156
+ {"name"=>"Andrey Kumanyaev",
157
+ "email"=>"me@zzet.org",
158
+ "time"=>"2014-02-16T02:24:23+04:00"},
159
+ "committer"=>
160
+ {"name"=>"Andrey Kumanyaev",
161
+ "email"=>"me@zzet.org",
162
+ "time"=>"2014-02-16T02:24:23+04:00"},
163
+ "message"=>"Save 2. Indexing work. Search too\n"}}}>
164
+
165
+
166
+ Project.last.repository.as_indexed_json
167
+ Project Load (1.7ms) SELECT "projects".* FROM "projects" ORDER BY "projects"."id" DESC LIMIT 1
168
+ Namespace Load (4.8ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = $1 ORDER BY "namespaces"."id" ASC LIMIT 1 [["id", 3739]]
169
+ Namespace Load (0.9ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."path" = 'zzet' LIMIT 1
170
+ Project Load (0.7ms) SELECT "projects".* FROM "projects" WHERE "projects"."namespace_id" = 3739 AND "projects"."path" = 'elasticsearch-git' LIMIT 1
171
+ # Long lines are stripped manually
172
+ => {:blobs=>
173
+ [[{:type=>"blob",
174
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_.gitignore",
175
+ :rid=>4328,
176
+ :oid=>"d87d4be66f458acd52878902bbf1391732ad21e1",
177
+ :content=>
178
+ "*.gem\n*.rbc\n.bundle\n.config\n.yardoc\nGemfile.lock\nInstalledFiles\n_yardoc\ncoverage\ndoc/\nlib/bundler/man\npkg\nrdoc\nspec/reports\ntest/tmp\ntest/version_tmp\ntmp\n",
179
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
180
+ {:type=>"blob",
181
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Gemfile",
182
+ :rid=>4328,
183
+ :oid=>"7322405f8f3ee5de24f7a727940ac52543e8954c",
184
+ :content=>
185
+ "source 'https://rubygems.org'\n\n# Specify your gem's dependencies in elasticsearch-git.gemspec\ngemspec\n\ngem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'\ngem 'elasticsearc....."
186
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
187
+ {:type=>"blob",
188
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_LICENSE.txt",
189
+ :rid=>4328,
190
+ :oid=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
191
+ :content=>
192
+ "Copyright (c) 2014 Andrey Kumanyaev\n\nMIT License\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Softw...."
193
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
194
+ {:type=>"blob",
195
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_README.md",
196
+ :rid=>4328,
197
+ :oid=>"8258d574dfc8040a5d003f06c6493e0033527f36",
198
+ :content=>
199
+ "# Elasticsearch::Git\n\nAttention: Pre-pre-pre beta code. Not production.\n\n[Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for g...."
200
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
201
+ {:type=>"blob",
202
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Rakefile",
203
+ :rid=>4328,
204
+ :oid=>"29955274e0d42e164337c411ad9144e8ffd7e46e",
205
+ :content=>"require \"bundler/gem_tasks\"\n",
206
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
207
+ {:type=>"blob",
208
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch-git.gemspec",
209
+ :rid=>4328,
210
+ :oid=>"67762437568dda1bb98ec5eca8be7e4a5c8115a9",
211
+ :content=>
212
+ "# coding: utf-8\nlib = File.expand_path('../lib', __FILE__)\n$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)\nrequire 'elasticsearch/git/version'\n\nGem::Specification.new do |spec|\n spec..."
213
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
214
+ {:type=>"blob",
215
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch/git.rb",
216
+ :rid=>4328,
217
+ :oid=>"d3817ec58af1f44dfd18856bf54ef2bf607901a8",
218
+ :content=>
219
+ "require \"elasticsearch/git/version\"\nrequire \"elasticsearch/git/model\"\nrequire \"elasticsearch/git/commit\"\n\nmodule Elasticsearch\n module Git\n class Test\n include Elasticsearch::..."
220
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
221
+ {:type=>"blob",
222
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/model.rb",
223
+ :rid=>4328,
224
+ :oid=>"3dfbae747f25391779fbe012fe8cc4f38cc4651c",
225
+ :content=>
226
+ "require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch/model'\n\nmodule Elasticsearch\n module Git\n module Model\n extend ActiveSupport::Concern\n\n include..."
227
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
228
+ {:type=>"blob",
229
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/repository.rb",
230
+ :rid=>4328,
231
+ :oid=>"70fe59c8391f6c27adb79c3e45824e6b4cf9566c",
232
+ :content=>
233
+ "require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch'\nrequire 'elasticsearch/model'\nrequire 'rugged'\nrequire 'gitlab_git'\n\nmodule Elasticsearch\n module Git\n m..."
234
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
235
+ {:type=>"blob",
236
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/version.rb",
237
+ :rid=>4328,
238
+ :oid=>"79e8082b122492464732f1fb43e9f2bdc96ea146",
239
+ :content=>
240
+ "module Elasticsearch\n module Git\n VERSION = \"0.0.1\"\n end\nend\n",
241
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
242
+ {:type=>"blob",
243
+ :id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_test/test_helper.rb",
244
+ :rid=>4328,
245
+ :oid=>"6acc0d2b7bf0f286557d3757c1140b41ab57e8f7",
246
+ :content=>
247
+ "require \"rubygems\"\nrequire 'bundler/setup'\nrequire 'pry'\n\nBundler.require\n\nrequire 'wrong/adapters/minitest'\n\nPROJECT_ROOT = File.join(Dir.pwd)\n\nWrong.config.color\n\nMinitest.autorun\n..."
248
+ :commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}]],
249
+ :commits=>
250
+ [{:type=>"commit",
251
+ :sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28",
252
+ :author=>
253
+ {:name=>"Andrey Kumanyaev",
254
+ :email=>"me@zzet.org",
255
+ :time=>2014-02-16 13:50:32 +0400},
256
+ :committer=>
257
+ {:name=>"Andrey Kumanyaev",
258
+ :email=>"me@zzet.org",
259
+ :time=>2014-02-16 13:50:32 +0400},
260
+ :message=>"Improve readme\n"},
261
+ {:type=>"commit",
262
+ :sha=>"37f1b0710eb7f41254ae0c33db09794a25bbb246",
263
+ :author=>
264
+ {:name=>"Andrey Kumanyaev",
265
+ :email=>"me@zzet.org",
266
+ :time=>2014-02-16 13:49:25 +0400},
267
+ :committer=>
268
+ {:name=>"Andrey Kumanyaev",
269
+ :email=>"me@zzet.org",
270
+ :time=>2014-02-16 13:49:25 +0400},
271
+ :message=>"prepare first test release\n"},
272
+ {:type=>"commit",
273
+ :sha=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
274
+ :author=>
275
+ {:name=>"Andrey Kumanyaev",
276
+ :email=>"me@zzet.org",
277
+ :time=>2014-02-16 02:24:23 +0400},
278
+ :committer=>
279
+ {:name=>"Andrey Kumanyaev",
280
+ :email=>"me@zzet.org",
281
+ :time=>2014-02-16 02:24:23 +0400},
282
+ :message=>"Save 2. Indexing work. Search too\n"},
283
+ {:type=>"commit",
284
+ :sha=>"3ed383bfbf6cba611d191dbc3590779c0444b7f0",
285
+ :author=>
286
+ {:name=>"Andrey Kumanyaev",
287
+ :email=>"me@zzet.org",
288
+ :time=>2014-02-16 00:23:10 +0400},
289
+ :committer=>
290
+ {:name=>"Andrey Kumanyaev",
291
+ :email=>"me@zzet.org",
292
+ :time=>2014-02-16 00:23:10 +0400},
293
+ :message=>"Save commit\n"},
294
+ {:type=>"commit",
295
+ :sha=>"7021addf520a19bdeceef29947c8687965c132ff",
296
+ :author=>
297
+ {:name=>"Andrey Kumanyaev",
298
+ :email=>"me@zzet.org",
299
+ :time=>2014-02-15 14:28:43 +0400},
300
+ :committer=>
301
+ {:name=>"Andrey Kumanyaev",
302
+ :email=>"me@zzet.org",
303
+ :time=>2014-02-15 14:28:43 +0400},
304
+ :message=>"first commit\n"}]}
305
+ ```
306
+
51
307
  ## Contributing
52
308
 
53
309
  1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency 'elasticsearch-model'
22
22
  spec.add_runtime_dependency 'elasticsearch-api'
23
23
  spec.add_runtime_dependency 'rugged'
24
- spec.add_runtime_dependency 'gitlab_git'
24
+ spec.add_runtime_dependency 'charlock_holmes'
25
+ spec.add_runtime_dependency 'gitlab-linguist'
25
26
  spec.add_runtime_dependency 'activemodel'
26
27
  spec.add_runtime_dependency 'activesupport'
27
28
  end
@@ -1,12 +1,9 @@
1
1
  require "elasticsearch/git/version"
2
2
  require "elasticsearch/git/model"
3
- require "elasticsearch/git/commit"
3
+ require "elasticsearch/git/repository"
4
4
 
5
5
  module Elasticsearch
6
6
  module Git
7
- class Test
8
- include Elasticsearch::Git::Model
9
- end
10
7
  end
11
8
  end
12
9
 
@@ -12,7 +12,13 @@ module Elasticsearch
12
12
  include ActiveModel::Model
13
13
  include Elasticsearch::Model
14
14
 
15
- index_name [self.name.downcase, 'index', Rails.env.to_s].join('-')
15
+ env = if defined?(::Rails)
16
+ ::Rails.env.to_s
17
+ else
18
+ "undefined"
19
+ end
20
+
21
+ index_name [self.name.downcase, 'index', env].join('-')
16
22
 
17
23
  settings \
18
24
  index: {
@@ -3,7 +3,7 @@ require 'active_model'
3
3
  require 'elasticsearch'
4
4
  require 'elasticsearch/model'
5
5
  require 'rugged'
6
- require 'gitlab_git'
6
+ require 'linguist'
7
7
 
8
8
  module Elasticsearch
9
9
  module Git
@@ -14,14 +14,16 @@ module Elasticsearch
14
14
  include Elasticsearch::Git::Model
15
15
 
16
16
  mapping do
17
- indexes :blobs do
17
+ indexes :blob do
18
18
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
+ indexes :rid, type: :string, index: :not_analyzed
19
20
  indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
20
21
  indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
22
  indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
23
  end
23
- indexes :commits do
24
+ indexes :commit do
24
25
  indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
26
+ indexes :rid, type: :string, index: :not_analyzed
25
27
  indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
26
28
  indexes :author do
27
29
  indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
@@ -51,23 +53,89 @@ module Elasticsearch
51
53
  # }
52
54
  #
53
55
  # For search from blobs use type 'blob'
54
- def index_blobs
55
- target_sha = repository_for_indexing.head.target
56
- repository_for_indexing.index.each do |blob|
56
+ def index_blobs(from_rev: nil, to_rev: nil)
57
+
58
+ if to_rev.present?
59
+ begin
60
+ raise unless repository_for_indexing.lookup(to_rev).type == :commit
61
+ rescue
62
+ raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
63
+ end
64
+ else
65
+ to_rev = repository_for_indexing.head.target
66
+ end
67
+
68
+ target_sha = to_rev
69
+
70
+ if from_rev.present?
71
+ begin
72
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
73
+ rescue
74
+ raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
75
+ end
76
+
77
+ diff = repository_for_indexing.diff(from_rev, to_rev)
78
+ diff.deltas.reverse.each do |delta|
79
+ if delta.status == :deleted
80
+ b = LiteBlob.new(repository_for_indexing, delta.old_file)
81
+ delete_from_index_blob(b)
82
+ else
83
+ b = LiteBlob.new(repository_for_indexing, delta.new_file)
84
+ index_blob(b, target_sha)
85
+ end
86
+ end
87
+ else
88
+ if repository_for_indexing.bare?
89
+ recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
90
+ else
91
+ repository_for_indexing.index.each do |blob|
92
+ b = LiteBlob.new(repository_for_indexing, blob)
93
+ index_blob(b, target_sha)
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ # Indexing bare repository via walking through tree
100
+ def recurse_blobs_index(tree, target_sha, path = "")
101
+ tree.each_blob do |blob|
102
+ blob[:path] = path + blob[:name]
57
103
  b = LiteBlob.new(repository_for_indexing, blob)
58
- if b.text?
59
- client_for_indexing.index \
60
- index: "#{self.class.index_name}",
61
- type: "blob",
62
- id: "#{repository_id}_#{b.path}",
63
- body: {
64
- blob: {
65
- oid: b.id,
66
- rid: repository_id,
67
- content: b.data,
68
- commit_sha: target_sha
69
- }
104
+ index_blob(b, target_sha)
105
+ end
106
+
107
+ tree.each_tree do |nested_tree|
108
+ recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
109
+ end
110
+ end
111
+
112
+ def index_blob(blob, target_sha)
113
+ if blob.text?
114
+ client_for_indexing.index \
115
+ index: "#{self.class.index_name}",
116
+ type: "repository",
117
+ id: "#{repository_id}_#{blob.path}",
118
+ body: {
119
+ blob: {
120
+ type: "blob",
121
+ oid: blob.id,
122
+ rid: repository_id,
123
+ content: blob.data,
124
+ commit_sha: target_sha
70
125
  }
126
+ }
127
+ end
128
+ end
129
+
130
+ def delete_from_index_blob(blob)
131
+ if blob.text?
132
+ begin
133
+ client_for_indexing.delete \
134
+ index: "#{self.class.index_name}",
135
+ type: "repository",
136
+ id: "#{repository_id}_#{blob.path}"
137
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
138
+ return true
71
139
  end
72
140
  end
73
141
  end
@@ -94,27 +162,45 @@ module Elasticsearch
94
162
  # }
95
163
  #
96
164
  # For search from commits use type 'commit'
97
- def index_commits
98
- repository_for_indexing.each_id do |oid|
99
- obj = repository_for_indexing.lookup(oid)
100
- if obj.type == :commit
101
- client_for_indexing.index \
102
- index: "#{self.class.index_name}",
103
- type: "commit",
104
- id: "#{repository_id}_#{obj.oid}",
105
- body: {
106
- commit: {
107
- rid: repository_id,
108
- sha: obj.oid,
109
- author: obj.author,
110
- committer: obj.committer,
111
- message: obj.message
112
- }
113
- }
165
+ def index_commits(from_rev: nil, to_rev: nil)
166
+ if from_rev.present? && to_rev.present?
167
+ begin
168
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
169
+ raise unless repository_for_indexing.lookup(from_rev).type == :commit
170
+ rescue
171
+ raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
172
+ end
173
+
174
+ repository_for_indexing.walk(from_rev, to_rev).each do |commit|
175
+ index_commit(commit)
176
+ end
177
+ else
178
+ repository_for_indexing.each_id do |oid|
179
+ obj = repository_for_indexing.lookup(oid)
180
+ if obj.type == :commit
181
+ index_commit(obj)
182
+ end
114
183
  end
115
184
  end
116
185
  end
117
186
 
187
+ def index_commit(commit)
188
+ client_for_indexing.index \
189
+ index: "#{self.class.index_name}",
190
+ type: "repository",
191
+ id: "#{repository_id}_#{commit.oid}",
192
+ body: {
193
+ commit: {
194
+ type: "commit",
195
+ rid: repository_id,
196
+ sha: commit.oid,
197
+ author: commit.author,
198
+ committer: commit.committer,
199
+ message: commit.message
200
+ }
201
+ }
202
+ end
203
+
118
204
  # Representation of repository as indexed json
119
205
  # Attention: It can be very very very huge hash
120
206
  def as_indexed_json(options = {})
@@ -129,19 +215,52 @@ module Elasticsearch
129
215
  result = []
130
216
 
131
217
  target_sha = repository_for_indexing.head.target
132
- repository_for_indexing.index.each do |blob|
133
- b = EasyBlob.new(repository_for_indexing, blob)
218
+
219
+ if repository_for_indexing.bare?
220
+ tree = repository_for_indexing.lookup(target_sha).tree
221
+ result.push(recurse_blobs_index_hash(tree))
222
+ else
223
+ repository_for_indexing.index.each do |blob|
224
+ b = EasyBlob.new(repository_for_indexing, blob)
225
+ result.push(
226
+ {
227
+ type: 'blob',
228
+ id: "#{target_sha}_#{b.path}",
229
+ rid: repository_id,
230
+ oid: b.id,
231
+ content: b.data,
232
+ commit_sha: target_sha
233
+ }
234
+ ) if b.text?
235
+ end
236
+ end
237
+
238
+ result
239
+ end
240
+
241
+ def recurse_blobs_index_hash(tree, path = "")
242
+ result = []
243
+
244
+ tree.each_blob do |blob|
245
+ blob[:path] = path + blob[:name]
246
+ b = LiteBlob.new(repository_for_indexing, blob)
134
247
  result.push(
135
248
  {
136
- id: "#{target_sha}_#{b.path}",
249
+ type: 'blob',
250
+ id: "#{repository_for_indexing.head.target}_#{path}#{blob[:name]}",
251
+ rid: repository_id,
137
252
  oid: b.id,
138
253
  content: b.data,
139
- commit_sha: target_sha
254
+ commit_sha: repository_for_indexing.head.target
140
255
  }
141
256
  ) if b.text?
142
257
  end
143
258
 
144
- result
259
+ tree.each_tree do |nested_tree|
260
+ result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
261
+ end
262
+
263
+ result.flatten
145
264
  end
146
265
 
147
266
  # Lookup all object ids for commit objects
@@ -153,6 +272,7 @@ module Elasticsearch
153
272
  if obj.type == :commit
154
273
  res.push(
155
274
  {
275
+ type: 'commit',
156
276
  sha: obj.oid,
157
277
  author: obj.author,
158
278
  committer: obj.committer,
@@ -165,27 +285,141 @@ module Elasticsearch
165
285
  res
166
286
  end
167
287
 
288
+ def search(query, type: :all, page: 1, per: 20, options: {})
289
+ options[:repository_id] = repository_id if options[:repository_id].nil?
290
+ self.class.search(query, type: type, page: page, per: per, options: options)
291
+ end
292
+
168
293
  # Repository id used for identity data from different repositories
169
294
  # Update this value if need
170
- def set_repository_id id
295
+ def set_repository_id id = nil
171
296
  @repository_id = id || path_to_repo
172
297
  end
173
298
 
299
+ # For Overwrite
300
+ def repository_id
301
+ @repository_id
302
+ end
303
+
304
+ unless defined?(path_to_repo)
305
+ def path_to_repo
306
+ if @path_to_repo.blank?
307
+ raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
308
+ else
309
+ @path_to_repo
310
+ end
311
+ end
312
+ end
313
+
174
314
  def repository_for_indexing(repo_path = "")
175
315
  @path_to_repo ||= repo_path
316
+ set_repository_id
176
317
  Rugged::Repository.new(@path_to_repo)
177
318
  end
178
319
 
179
320
  def client_for_indexing
180
321
  @client_for_indexing ||= Elasticsearch::Client.new log: true
181
322
  end
323
+ end
324
+
325
+ module ClassMethods
326
+ def search(query, type: :all, page: 1, per: 20, options: {})
327
+ results = { blobs: [], commits: []}
328
+ case type.to_sym
329
+ when :all
330
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
331
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
332
+ when :blob
333
+ results[:blobs] = search_blob(query, page: page, per: per, options: options)
334
+ when :commit
335
+ results[:commits] = search_commit(query, page: page, per: per, options: options)
336
+ end
337
+
338
+ results
339
+ end
340
+
341
+ def search_commit(query, page: 1, per: 20, options: {})
342
+ page ||= 1
343
+
344
+ fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
345
+
346
+ query_hash = {
347
+ query: {
348
+ filtered: {
349
+ query: {
350
+ multi_match: {
351
+ fields: fields,
352
+ query: "#{query}",
353
+ operator: :and
354
+ }
355
+ },
356
+ },
357
+ },
358
+ size: per,
359
+ from: per * (page - 1)
360
+ }
361
+
362
+ if query.blank?
363
+ query_hash[:query][:filtered][:query] = { match_all: {}}
364
+ query_hash[:track_scores] = true
365
+ end
366
+
367
+ if options[:repository_id]
368
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
369
+ query_hash[:query][:filtered][:filter][:and] << {
370
+ terms: {
371
+ "commit.rid" => [options[:repository_id]].flatten
372
+ }
373
+ }
374
+ end
375
+
376
+ if options[:highlight]
377
+ query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
378
+ end
379
+
380
+ self.__elasticsearch__.search(query_hash).results
381
+ end
182
382
 
383
+ def search_blob(query, type: :all, page: 1, per: 20, options: {})
384
+ page ||= 1
385
+
386
+ query_hash = {
387
+ query: {
388
+ filtered: {
389
+ query: {
390
+ match: {
391
+ 'blob.content' => {
392
+ query: "#{query}",
393
+ operator: :and
394
+ }
395
+ }
396
+ }
397
+ }
398
+ },
399
+ size: per,
400
+ from: per * (page - 1)
401
+ }
402
+
403
+ if options[:repository_id]
404
+ query_hash[:query][:filtered][:filter] ||= { and: [] }
405
+ query_hash[:query][:filtered][:filter][:and] << {
406
+ terms: {
407
+ "blob.rid" => [options[:repository_id]].flatten
408
+ }
409
+ }
410
+ end
411
+
412
+ if options[:highlight]
413
+ query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
414
+ end
415
+
416
+ self.__elasticsearch__.search(query_hash).results
417
+ end
183
418
  end
184
419
  end
185
420
 
186
421
  class LiteBlob
187
422
  include Linguist::BlobHelper
188
- include EncodingHelper
189
423
 
190
424
  attr_accessor :id, :name, :path, :data, :commit_id
191
425
 
@@ -195,6 +429,37 @@ module Elasticsearch
195
429
  @name = @path.split("/").last
196
430
  @data = encode!(repo.lookup(@id).content)
197
431
  end
432
+
433
+ def encode!(message)
434
+ return nil unless message.respond_to? :force_encoding
435
+
436
+ # if message is utf-8 encoding, just return it
437
+ message.force_encoding("UTF-8")
438
+ return message if message.valid_encoding?
439
+
440
+ # return message if message type is binary
441
+ detect = CharlockHolmes::EncodingDetector.detect(message)
442
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
443
+
444
+ # encoding message to detect encoding
445
+ if detect && detect[:encoding]
446
+ message.force_encoding(detect[:encoding])
447
+ end
448
+
449
+ # encode and clean the bad chars
450
+ message.replace clean(message)
451
+ rescue
452
+ encoding = detect ? detect[:encoding] : "unknown"
453
+ "--broken encoding: #{encoding}"
454
+ end
455
+
456
+ private
457
+
458
+ def clean(message)
459
+ message.encode("UTF-16BE", :undef => :replace, :invalid => :replace, :replace => "")
460
+ .encode("UTF-8")
461
+ .gsub("\0".encode("UTF-8"), "")
462
+ end
198
463
  end
199
464
  end
200
465
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticsearch
2
2
  module Git
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch-git
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey Kumanyaev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch-model
@@ -53,7 +53,21 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: gitlab_git
56
+ name: charlock_holmes
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: gitlab-linguist
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - '>='