elasticsearch-git 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -1
- data/README.md +258 -2
- data/elasticsearch-git.gemspec +2 -1
- data/lib/elasticsearch/git.rb +1 -4
- data/lib/elasticsearch/git/model.rb +7 -1
- data/lib/elasticsearch/git/repository.rb +307 -42
- data/lib/elasticsearch/git/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 743ab16ca2c38bbe9ab23dc7447c5c41d578f415
|
4
|
+
data.tar.gz: 4fba8f5da92706100fa6b01e582cbcd16a83d5ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efe576c9de405aa4b22c89cfe21171d6de7ac5e686ed4207e73e085cf83d673b114dced51cc6ecb051b6f38aeba9227bf45619cd8f65b7a228101a703d250599
|
7
|
+
data.tar.gz: 2e18154c17713a627d3d4b9519f279348bcdccd0779819e7164f1d01e9c62092dc8c5c408b34782e2f46f6c8778a8c52fa75a3985537fcfb30baa36c5088cd66
|
data/Gemfile
CHANGED
@@ -9,7 +9,8 @@ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development',
|
|
9
9
|
gem 'bundler'
|
10
10
|
gem 'rake'
|
11
11
|
gem 'pry'
|
12
|
-
gem '
|
12
|
+
gem 'charlock_holmes', '~> 0.6.9'
|
13
|
+
gem 'gitlab-linguist', '~> 3.0.0'
|
13
14
|
gem 'minitest'
|
14
15
|
gem 'activesupport', '> 4.0.0'
|
15
16
|
gem 'activemodel', '> 4.0.0'
|
data/README.md
CHANGED
@@ -30,9 +30,11 @@ $ gem install elasticsearch-git
|
|
30
30
|
class Repository
|
31
31
|
include Elasticsearch::Git::Repository
|
32
32
|
|
33
|
-
|
34
|
-
|
33
|
+
def repository_id
|
34
|
+
project.id
|
35
|
+
end
|
35
36
|
|
37
|
+
repository_for_indexing '/path/to/your/repo'
|
36
38
|
end
|
37
39
|
|
38
40
|
Repository.__elasticsearch__.create_index! force: true
|
@@ -41,6 +43,9 @@ repo = Repository.new
|
|
41
43
|
repo.index_commits
|
42
44
|
repo.index_blobs
|
43
45
|
|
46
|
+
repo.index_commits(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
|
47
|
+
repo.index_blobs(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
|
48
|
+
|
44
49
|
Repository.search("query", type: 'blob')
|
45
50
|
Repository.search("query", type: 'commit')
|
46
51
|
|
@@ -48,6 +53,257 @@ Repository.search("query", type: 'commit')
|
|
48
53
|
Repository.search("query")
|
49
54
|
```
|
50
55
|
|
56
|
+
## Integration with Gitlab
|
57
|
+
|
58
|
+
``` ruby
|
59
|
+
# app/elastic/repositories_search.rb
|
60
|
+
module RepositoriesSearch
|
61
|
+
extend ActiveSupport::Concern
|
62
|
+
|
63
|
+
included do
|
64
|
+
include Elasticsearch::Git::Repository
|
65
|
+
|
66
|
+
def repository_id
|
67
|
+
project.id
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# app/models/repository.rb
|
74
|
+
class Repository
|
75
|
+
include RepositoriesSearch
|
76
|
+
#...
|
77
|
+
def project
|
78
|
+
@project ||= Project.find_with_namespace(@path_with_namespace)
|
79
|
+
end
|
80
|
+
#...
|
81
|
+
end
|
82
|
+
|
83
|
+
Project.last.repository.__elasticsearch__.create_index! force: true
|
84
|
+
Project.last.repository.index_commits
|
85
|
+
Project.last.repository.index_blobs
|
86
|
+
|
87
|
+
Repository.search("some_query")
|
88
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
89
|
+
|
90
|
+
Repository.search("some_query", type: :blob)
|
91
|
+
# => {blobs: [{}, {}, {}], commits: []}
|
92
|
+
|
93
|
+
Repository.search("some_query", type: :commit)
|
94
|
+
# => {blobs: [], commits: [{}, {}, {}]}
|
95
|
+
|
96
|
+
Repository.search("some_query", type: :commit, page: 2, per: 50)
|
97
|
+
# => ...
|
98
|
+
|
99
|
+
Repository.search("some_query", options: { repository_id: Project.last.id })
|
100
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
101
|
+
|
102
|
+
Repository.search("some_query", options: { repository_id: current_user.authorized_projects.ids })
|
103
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
104
|
+
|
105
|
+
Project.last.repository.search("Copyright")[:blobs].first
|
106
|
+
=> #<Elasticsearch::Model::Response::Result:0xbb84b3fc
|
107
|
+
@result=
|
108
|
+
{"_index"=>"repository-index-development",
|
109
|
+
"_type"=>"repository",
|
110
|
+
"_id"=>"4328_LICENSE.txt",
|
111
|
+
"_score"=>0.034848917,
|
112
|
+
"_source"=>
|
113
|
+
{"blob"=>
|
114
|
+
{"type"=>"blob",
|
115
|
+
"oid"=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
|
116
|
+
"rid"=>4328,
|
117
|
+
"content"=>
|
118
|
+
"Copyright (c) 2014 Andrey Kumanyaev\n\nMIT
|
119
|
+
License\n\nPermission is hereby granted, free of charge, to any person
|
120
|
+
obtaining\na copy of this software and associated documentation files
|
121
|
+
(the\n\"Software\"), to deal in the Software without restriction,
|
122
|
+
including\nwithout limitation the rights to use, copy, modify, merge,
|
123
|
+
publish,\ndistribute, sublicense, and/or sell copies of the Software,
|
124
|
+
and to\npermit persons to whom the Software is furnished to do so,
|
125
|
+
subject to\nthe following conditions:\n\nThe above copyright notice and
|
126
|
+
this permission notice shall be\nincluded in all copies or substantial
|
127
|
+
portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT
|
128
|
+
WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
|
129
|
+
THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
|
130
|
+
AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
131
|
+
BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
132
|
+
ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
133
|
+
CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
134
|
+
SOFTWARE.\n",
|
135
|
+
"commit_sha"=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}}}>
|
136
|
+
```
|
137
|
+
|
138
|
+
## Examples
|
139
|
+
|
140
|
+
After integration this gem into [Gitlab](https://github.com/gitlabhq/gitlabhq)
|
141
|
+
|
142
|
+
``` ruby
|
143
|
+
Repository.search("too")[:commits].first
|
144
|
+
=> #<Elasticsearch::Model::Response::Result:0xbb50dfdc
|
145
|
+
@result=
|
146
|
+
{"_index"=>"repository-index-development",
|
147
|
+
"_type"=>"repository",
|
148
|
+
"_id"=>"4328_1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
149
|
+
"_score"=>0.15873253,
|
150
|
+
"_source"=>
|
151
|
+
{"commit"=>
|
152
|
+
{"type"=>"commit",
|
153
|
+
"rid"=>4328,
|
154
|
+
"sha"=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
155
|
+
"author"=>
|
156
|
+
{"name"=>"Andrey Kumanyaev",
|
157
|
+
"email"=>"me@zzet.org",
|
158
|
+
"time"=>"2014-02-16T02:24:23+04:00"},
|
159
|
+
"committer"=>
|
160
|
+
{"name"=>"Andrey Kumanyaev",
|
161
|
+
"email"=>"me@zzet.org",
|
162
|
+
"time"=>"2014-02-16T02:24:23+04:00"},
|
163
|
+
"message"=>"Save 2. Indexing work. Search too\n"}}}>
|
164
|
+
|
165
|
+
|
166
|
+
Project.last.repository.as_indexed_json
|
167
|
+
Project Load (1.7ms) SELECT "projects".* FROM "projects" ORDER BY "projects"."id" DESC LIMIT 1
|
168
|
+
Namespace Load (4.8ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = $1 ORDER BY "namespaces"."id" ASC LIMIT 1 [["id", 3739]]
|
169
|
+
Namespace Load (0.9ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."path" = 'zzet' LIMIT 1
|
170
|
+
Project Load (0.7ms) SELECT "projects".* FROM "projects" WHERE "projects"."namespace_id" = 3739 AND "projects"."path" = 'elasticsearch-git' LIMIT 1
|
171
|
+
# Long lines are stripped manually
|
172
|
+
=> {:blobs=>
|
173
|
+
[[{:type=>"blob",
|
174
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_.gitignore",
|
175
|
+
:rid=>4328,
|
176
|
+
:oid=>"d87d4be66f458acd52878902bbf1391732ad21e1",
|
177
|
+
:content=>
|
178
|
+
"*.gem\n*.rbc\n.bundle\n.config\n.yardoc\nGemfile.lock\nInstalledFiles\n_yardoc\ncoverage\ndoc/\nlib/bundler/man\npkg\nrdoc\nspec/reports\ntest/tmp\ntest/version_tmp\ntmp\n",
|
179
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
180
|
+
{:type=>"blob",
|
181
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Gemfile",
|
182
|
+
:rid=>4328,
|
183
|
+
:oid=>"7322405f8f3ee5de24f7a727940ac52543e8954c",
|
184
|
+
:content=>
|
185
|
+
"source 'https://rubygems.org'\n\n# Specify your gem's dependencies in elasticsearch-git.gemspec\ngemspec\n\ngem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'\ngem 'elasticsearc....."
|
186
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
187
|
+
{:type=>"blob",
|
188
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_LICENSE.txt",
|
189
|
+
:rid=>4328,
|
190
|
+
:oid=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
|
191
|
+
:content=>
|
192
|
+
"Copyright (c) 2014 Andrey Kumanyaev\n\nMIT License\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Softw...."
|
193
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
194
|
+
{:type=>"blob",
|
195
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_README.md",
|
196
|
+
:rid=>4328,
|
197
|
+
:oid=>"8258d574dfc8040a5d003f06c6493e0033527f36",
|
198
|
+
:content=>
|
199
|
+
"# Elasticsearch::Git\n\nAttention: Pre-pre-pre beta code. Not production.\n\n[Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for g...."
|
200
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
201
|
+
{:type=>"blob",
|
202
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Rakefile",
|
203
|
+
:rid=>4328,
|
204
|
+
:oid=>"29955274e0d42e164337c411ad9144e8ffd7e46e",
|
205
|
+
:content=>"require \"bundler/gem_tasks\"\n",
|
206
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
207
|
+
{:type=>"blob",
|
208
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch-git.gemspec",
|
209
|
+
:rid=>4328,
|
210
|
+
:oid=>"67762437568dda1bb98ec5eca8be7e4a5c8115a9",
|
211
|
+
:content=>
|
212
|
+
"# coding: utf-8\nlib = File.expand_path('../lib', __FILE__)\n$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)\nrequire 'elasticsearch/git/version'\n\nGem::Specification.new do |spec|\n spec..."
|
213
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
214
|
+
{:type=>"blob",
|
215
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch/git.rb",
|
216
|
+
:rid=>4328,
|
217
|
+
:oid=>"d3817ec58af1f44dfd18856bf54ef2bf607901a8",
|
218
|
+
:content=>
|
219
|
+
"require \"elasticsearch/git/version\"\nrequire \"elasticsearch/git/model\"\nrequire \"elasticsearch/git/commit\"\n\nmodule Elasticsearch\n module Git\n class Test\n include Elasticsearch::..."
|
220
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
221
|
+
{:type=>"blob",
|
222
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/model.rb",
|
223
|
+
:rid=>4328,
|
224
|
+
:oid=>"3dfbae747f25391779fbe012fe8cc4f38cc4651c",
|
225
|
+
:content=>
|
226
|
+
"require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch/model'\n\nmodule Elasticsearch\n module Git\n module Model\n extend ActiveSupport::Concern\n\n include..."
|
227
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
228
|
+
{:type=>"blob",
|
229
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/repository.rb",
|
230
|
+
:rid=>4328,
|
231
|
+
:oid=>"70fe59c8391f6c27adb79c3e45824e6b4cf9566c",
|
232
|
+
:content=>
|
233
|
+
"require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch'\nrequire 'elasticsearch/model'\nrequire 'rugged'\nrequire 'gitlab_git'\n\nmodule Elasticsearch\n module Git\n m..."
|
234
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
235
|
+
{:type=>"blob",
|
236
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/version.rb",
|
237
|
+
:rid=>4328,
|
238
|
+
:oid=>"79e8082b122492464732f1fb43e9f2bdc96ea146",
|
239
|
+
:content=>
|
240
|
+
"module Elasticsearch\n module Git\n VERSION = \"0.0.1\"\n end\nend\n",
|
241
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
242
|
+
{:type=>"blob",
|
243
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_test/test_helper.rb",
|
244
|
+
:rid=>4328,
|
245
|
+
:oid=>"6acc0d2b7bf0f286557d3757c1140b41ab57e8f7",
|
246
|
+
:content=>
|
247
|
+
"require \"rubygems\"\nrequire 'bundler/setup'\nrequire 'pry'\n\nBundler.require\n\nrequire 'wrong/adapters/minitest'\n\nPROJECT_ROOT = File.join(Dir.pwd)\n\nWrong.config.color\n\nMinitest.autorun\n..."
|
248
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}]],
|
249
|
+
:commits=>
|
250
|
+
[{:type=>"commit",
|
251
|
+
:sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28",
|
252
|
+
:author=>
|
253
|
+
{:name=>"Andrey Kumanyaev",
|
254
|
+
:email=>"me@zzet.org",
|
255
|
+
:time=>2014-02-16 13:50:32 +0400},
|
256
|
+
:committer=>
|
257
|
+
{:name=>"Andrey Kumanyaev",
|
258
|
+
:email=>"me@zzet.org",
|
259
|
+
:time=>2014-02-16 13:50:32 +0400},
|
260
|
+
:message=>"Improve readme\n"},
|
261
|
+
{:type=>"commit",
|
262
|
+
:sha=>"37f1b0710eb7f41254ae0c33db09794a25bbb246",
|
263
|
+
:author=>
|
264
|
+
{:name=>"Andrey Kumanyaev",
|
265
|
+
:email=>"me@zzet.org",
|
266
|
+
:time=>2014-02-16 13:49:25 +0400},
|
267
|
+
:committer=>
|
268
|
+
{:name=>"Andrey Kumanyaev",
|
269
|
+
:email=>"me@zzet.org",
|
270
|
+
:time=>2014-02-16 13:49:25 +0400},
|
271
|
+
:message=>"prepare first test release\n"},
|
272
|
+
{:type=>"commit",
|
273
|
+
:sha=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
274
|
+
:author=>
|
275
|
+
{:name=>"Andrey Kumanyaev",
|
276
|
+
:email=>"me@zzet.org",
|
277
|
+
:time=>2014-02-16 02:24:23 +0400},
|
278
|
+
:committer=>
|
279
|
+
{:name=>"Andrey Kumanyaev",
|
280
|
+
:email=>"me@zzet.org",
|
281
|
+
:time=>2014-02-16 02:24:23 +0400},
|
282
|
+
:message=>"Save 2. Indexing work. Search too\n"},
|
283
|
+
{:type=>"commit",
|
284
|
+
:sha=>"3ed383bfbf6cba611d191dbc3590779c0444b7f0",
|
285
|
+
:author=>
|
286
|
+
{:name=>"Andrey Kumanyaev",
|
287
|
+
:email=>"me@zzet.org",
|
288
|
+
:time=>2014-02-16 00:23:10 +0400},
|
289
|
+
:committer=>
|
290
|
+
{:name=>"Andrey Kumanyaev",
|
291
|
+
:email=>"me@zzet.org",
|
292
|
+
:time=>2014-02-16 00:23:10 +0400},
|
293
|
+
:message=>"Save commit\n"},
|
294
|
+
{:type=>"commit",
|
295
|
+
:sha=>"7021addf520a19bdeceef29947c8687965c132ff",
|
296
|
+
:author=>
|
297
|
+
{:name=>"Andrey Kumanyaev",
|
298
|
+
:email=>"me@zzet.org",
|
299
|
+
:time=>2014-02-15 14:28:43 +0400},
|
300
|
+
:committer=>
|
301
|
+
{:name=>"Andrey Kumanyaev",
|
302
|
+
:email=>"me@zzet.org",
|
303
|
+
:time=>2014-02-15 14:28:43 +0400},
|
304
|
+
:message=>"first commit\n"}]}
|
305
|
+
```
|
306
|
+
|
51
307
|
## Contributing
|
52
308
|
|
53
309
|
1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
|
data/elasticsearch-git.gemspec
CHANGED
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency 'elasticsearch-model'
|
22
22
|
spec.add_runtime_dependency 'elasticsearch-api'
|
23
23
|
spec.add_runtime_dependency 'rugged'
|
24
|
-
spec.add_runtime_dependency '
|
24
|
+
spec.add_runtime_dependency 'charlock_holmes'
|
25
|
+
spec.add_runtime_dependency 'gitlab-linguist'
|
25
26
|
spec.add_runtime_dependency 'activemodel'
|
26
27
|
spec.add_runtime_dependency 'activesupport'
|
27
28
|
end
|
data/lib/elasticsearch/git.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
require "elasticsearch/git/version"
|
2
2
|
require "elasticsearch/git/model"
|
3
|
-
require "elasticsearch/git/
|
3
|
+
require "elasticsearch/git/repository"
|
4
4
|
|
5
5
|
module Elasticsearch
|
6
6
|
module Git
|
7
|
-
class Test
|
8
|
-
include Elasticsearch::Git::Model
|
9
|
-
end
|
10
7
|
end
|
11
8
|
end
|
12
9
|
|
@@ -12,7 +12,13 @@ module Elasticsearch
|
|
12
12
|
include ActiveModel::Model
|
13
13
|
include Elasticsearch::Model
|
14
14
|
|
15
|
-
|
15
|
+
env = if defined?(::Rails)
|
16
|
+
::Rails.env.to_s
|
17
|
+
else
|
18
|
+
"undefined"
|
19
|
+
end
|
20
|
+
|
21
|
+
index_name [self.name.downcase, 'index', env].join('-')
|
16
22
|
|
17
23
|
settings \
|
18
24
|
index: {
|
@@ -3,7 +3,7 @@ require 'active_model'
|
|
3
3
|
require 'elasticsearch'
|
4
4
|
require 'elasticsearch/model'
|
5
5
|
require 'rugged'
|
6
|
-
require '
|
6
|
+
require 'linguist'
|
7
7
|
|
8
8
|
module Elasticsearch
|
9
9
|
module Git
|
@@ -14,14 +14,16 @@ module Elasticsearch
|
|
14
14
|
include Elasticsearch::Git::Model
|
15
15
|
|
16
16
|
mapping do
|
17
|
-
indexes :
|
17
|
+
indexes :blob do
|
18
18
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
19
|
+
indexes :rid, type: :string, index: :not_analyzed
|
19
20
|
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
20
21
|
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
21
22
|
indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
|
22
23
|
end
|
23
|
-
indexes :
|
24
|
+
indexes :commit do
|
24
25
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
26
|
+
indexes :rid, type: :string, index: :not_analyzed
|
25
27
|
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
26
28
|
indexes :author do
|
27
29
|
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
|
@@ -51,23 +53,89 @@ module Elasticsearch
|
|
51
53
|
# }
|
52
54
|
#
|
53
55
|
# For search from blobs use type 'blob'
|
54
|
-
def index_blobs
|
55
|
-
|
56
|
-
|
56
|
+
def index_blobs(from_rev: nil, to_rev: nil)
|
57
|
+
|
58
|
+
if to_rev.present?
|
59
|
+
begin
|
60
|
+
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
61
|
+
rescue
|
62
|
+
raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
|
63
|
+
end
|
64
|
+
else
|
65
|
+
to_rev = repository_for_indexing.head.target
|
66
|
+
end
|
67
|
+
|
68
|
+
target_sha = to_rev
|
69
|
+
|
70
|
+
if from_rev.present?
|
71
|
+
begin
|
72
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
73
|
+
rescue
|
74
|
+
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
75
|
+
end
|
76
|
+
|
77
|
+
diff = repository_for_indexing.diff(from_rev, to_rev)
|
78
|
+
diff.deltas.reverse.each do |delta|
|
79
|
+
if delta.status == :deleted
|
80
|
+
b = LiteBlob.new(repository_for_indexing, delta.old_file)
|
81
|
+
delete_from_index_blob(b)
|
82
|
+
else
|
83
|
+
b = LiteBlob.new(repository_for_indexing, delta.new_file)
|
84
|
+
index_blob(b, target_sha)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
else
|
88
|
+
if repository_for_indexing.bare?
|
89
|
+
recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
|
90
|
+
else
|
91
|
+
repository_for_indexing.index.each do |blob|
|
92
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
93
|
+
index_blob(b, target_sha)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Indexing bare repository via walking through tree
|
100
|
+
def recurse_blobs_index(tree, target_sha, path = "")
|
101
|
+
tree.each_blob do |blob|
|
102
|
+
blob[:path] = path + blob[:name]
|
57
103
|
b = LiteBlob.new(repository_for_indexing, blob)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
104
|
+
index_blob(b, target_sha)
|
105
|
+
end
|
106
|
+
|
107
|
+
tree.each_tree do |nested_tree|
|
108
|
+
recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def index_blob(blob, target_sha)
|
113
|
+
if blob.text?
|
114
|
+
client_for_indexing.index \
|
115
|
+
index: "#{self.class.index_name}",
|
116
|
+
type: "repository",
|
117
|
+
id: "#{repository_id}_#{blob.path}",
|
118
|
+
body: {
|
119
|
+
blob: {
|
120
|
+
type: "blob",
|
121
|
+
oid: blob.id,
|
122
|
+
rid: repository_id,
|
123
|
+
content: blob.data,
|
124
|
+
commit_sha: target_sha
|
70
125
|
}
|
126
|
+
}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def delete_from_index_blob(blob)
|
131
|
+
if blob.text?
|
132
|
+
begin
|
133
|
+
client_for_indexing.delete \
|
134
|
+
index: "#{self.class.index_name}",
|
135
|
+
type: "repository",
|
136
|
+
id: "#{repository_id}_#{blob.path}"
|
137
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
138
|
+
return true
|
71
139
|
end
|
72
140
|
end
|
73
141
|
end
|
@@ -94,27 +162,45 @@ module Elasticsearch
|
|
94
162
|
# }
|
95
163
|
#
|
96
164
|
# For search from commits use type 'commit'
|
97
|
-
def index_commits
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
165
|
+
def index_commits(from_rev: nil, to_rev: nil)
|
166
|
+
if from_rev.present? && to_rev.present?
|
167
|
+
begin
|
168
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
169
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
170
|
+
rescue
|
171
|
+
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
172
|
+
end
|
173
|
+
|
174
|
+
repository_for_indexing.walk(from_rev, to_rev).each do |commit|
|
175
|
+
index_commit(commit)
|
176
|
+
end
|
177
|
+
else
|
178
|
+
repository_for_indexing.each_id do |oid|
|
179
|
+
obj = repository_for_indexing.lookup(oid)
|
180
|
+
if obj.type == :commit
|
181
|
+
index_commit(obj)
|
182
|
+
end
|
114
183
|
end
|
115
184
|
end
|
116
185
|
end
|
117
186
|
|
187
|
+
def index_commit(commit)
|
188
|
+
client_for_indexing.index \
|
189
|
+
index: "#{self.class.index_name}",
|
190
|
+
type: "repository",
|
191
|
+
id: "#{repository_id}_#{commit.oid}",
|
192
|
+
body: {
|
193
|
+
commit: {
|
194
|
+
type: "commit",
|
195
|
+
rid: repository_id,
|
196
|
+
sha: commit.oid,
|
197
|
+
author: commit.author,
|
198
|
+
committer: commit.committer,
|
199
|
+
message: commit.message
|
200
|
+
}
|
201
|
+
}
|
202
|
+
end
|
203
|
+
|
118
204
|
# Representation of repository as indexed json
|
119
205
|
# Attention: It can be very very very huge hash
|
120
206
|
def as_indexed_json(options = {})
|
@@ -129,19 +215,52 @@ module Elasticsearch
|
|
129
215
|
result = []
|
130
216
|
|
131
217
|
target_sha = repository_for_indexing.head.target
|
132
|
-
|
133
|
-
|
218
|
+
|
219
|
+
if repository_for_indexing.bare?
|
220
|
+
tree = repository_for_indexing.lookup(target_sha).tree
|
221
|
+
result.push(recurse_blobs_index_hash(tree))
|
222
|
+
else
|
223
|
+
repository_for_indexing.index.each do |blob|
|
224
|
+
b = EasyBlob.new(repository_for_indexing, blob)
|
225
|
+
result.push(
|
226
|
+
{
|
227
|
+
type: 'blob',
|
228
|
+
id: "#{target_sha}_#{b.path}",
|
229
|
+
rid: repository_id,
|
230
|
+
oid: b.id,
|
231
|
+
content: b.data,
|
232
|
+
commit_sha: target_sha
|
233
|
+
}
|
234
|
+
) if b.text?
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
result
|
239
|
+
end
|
240
|
+
|
241
|
+
def recurse_blobs_index_hash(tree, path = "")
|
242
|
+
result = []
|
243
|
+
|
244
|
+
tree.each_blob do |blob|
|
245
|
+
blob[:path] = path + blob[:name]
|
246
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
134
247
|
result.push(
|
135
248
|
{
|
136
|
-
|
249
|
+
type: 'blob',
|
250
|
+
id: "#{repository_for_indexing.head.target}_#{path}#{blob[:name]}",
|
251
|
+
rid: repository_id,
|
137
252
|
oid: b.id,
|
138
253
|
content: b.data,
|
139
|
-
commit_sha:
|
254
|
+
commit_sha: repository_for_indexing.head.target
|
140
255
|
}
|
141
256
|
) if b.text?
|
142
257
|
end
|
143
258
|
|
144
|
-
|
259
|
+
tree.each_tree do |nested_tree|
|
260
|
+
result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
|
261
|
+
end
|
262
|
+
|
263
|
+
result.flatten
|
145
264
|
end
|
146
265
|
|
147
266
|
# Lookup all object ids for commit objects
|
@@ -153,6 +272,7 @@ module Elasticsearch
|
|
153
272
|
if obj.type == :commit
|
154
273
|
res.push(
|
155
274
|
{
|
275
|
+
type: 'commit',
|
156
276
|
sha: obj.oid,
|
157
277
|
author: obj.author,
|
158
278
|
committer: obj.committer,
|
@@ -165,27 +285,141 @@ module Elasticsearch
|
|
165
285
|
res
|
166
286
|
end
|
167
287
|
|
288
|
+
def search(query, type: :all, page: 1, per: 20, options: {})
|
289
|
+
options[:repository_id] = repository_id if options[:repository_id].nil?
|
290
|
+
self.class.search(query, type: type, page: page, per: per, options: options)
|
291
|
+
end
|
292
|
+
|
168
293
|
# Repository id used for identity data from different repositories
|
169
294
|
# Update this value if need
|
170
|
-
def set_repository_id id
|
295
|
+
def set_repository_id id = nil
|
171
296
|
@repository_id = id || path_to_repo
|
172
297
|
end
|
173
298
|
|
299
|
+
# For Overwrite
|
300
|
+
def repository_id
|
301
|
+
@repository_id
|
302
|
+
end
|
303
|
+
|
304
|
+
unless defined?(path_to_repo)
|
305
|
+
def path_to_repo
|
306
|
+
if @path_to_repo.blank?
|
307
|
+
raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
|
308
|
+
else
|
309
|
+
@path_to_repo
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
174
314
|
def repository_for_indexing(repo_path = "")
|
175
315
|
@path_to_repo ||= repo_path
|
316
|
+
set_repository_id
|
176
317
|
Rugged::Repository.new(@path_to_repo)
|
177
318
|
end
|
178
319
|
|
179
320
|
def client_for_indexing
|
180
321
|
@client_for_indexing ||= Elasticsearch::Client.new log: true
|
181
322
|
end
|
323
|
+
end
|
324
|
+
|
325
|
+
module ClassMethods
|
326
|
+
def search(query, type: :all, page: 1, per: 20, options: {})
|
327
|
+
results = { blobs: [], commits: []}
|
328
|
+
case type.to_sym
|
329
|
+
when :all
|
330
|
+
results[:blobs] = search_blob(query, page: page, per: per, options: options)
|
331
|
+
results[:commits] = search_commit(query, page: page, per: per, options: options)
|
332
|
+
when :blob
|
333
|
+
results[:blobs] = search_blob(query, page: page, per: per, options: options)
|
334
|
+
when :commit
|
335
|
+
results[:commits] = search_commit(query, page: page, per: per, options: options)
|
336
|
+
end
|
337
|
+
|
338
|
+
results
|
339
|
+
end
|
340
|
+
|
341
|
+
def search_commit(query, page: 1, per: 20, options: {})
|
342
|
+
page ||= 1
|
343
|
+
|
344
|
+
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
|
345
|
+
|
346
|
+
query_hash = {
|
347
|
+
query: {
|
348
|
+
filtered: {
|
349
|
+
query: {
|
350
|
+
multi_match: {
|
351
|
+
fields: fields,
|
352
|
+
query: "#{query}",
|
353
|
+
operator: :and
|
354
|
+
}
|
355
|
+
},
|
356
|
+
},
|
357
|
+
},
|
358
|
+
size: per,
|
359
|
+
from: per * (page - 1)
|
360
|
+
}
|
361
|
+
|
362
|
+
if query.blank?
|
363
|
+
query_hash[:query][:filtered][:query] = { match_all: {}}
|
364
|
+
query_hash[:track_scores] = true
|
365
|
+
end
|
366
|
+
|
367
|
+
if options[:repository_id]
|
368
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
369
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
370
|
+
terms: {
|
371
|
+
"commit.rid" => [options[:repository_id]].flatten
|
372
|
+
}
|
373
|
+
}
|
374
|
+
end
|
375
|
+
|
376
|
+
if options[:highlight]
|
377
|
+
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
378
|
+
end
|
379
|
+
|
380
|
+
self.__elasticsearch__.search(query_hash).results
|
381
|
+
end
|
182
382
|
|
383
|
+
def search_blob(query, type: :all, page: 1, per: 20, options: {})
|
384
|
+
page ||= 1
|
385
|
+
|
386
|
+
query_hash = {
|
387
|
+
query: {
|
388
|
+
filtered: {
|
389
|
+
query: {
|
390
|
+
match: {
|
391
|
+
'blob.content' => {
|
392
|
+
query: "#{query}",
|
393
|
+
operator: :and
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
}
|
398
|
+
},
|
399
|
+
size: per,
|
400
|
+
from: per * (page - 1)
|
401
|
+
}
|
402
|
+
|
403
|
+
if options[:repository_id]
|
404
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
405
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
406
|
+
terms: {
|
407
|
+
"blob.rid" => [options[:repository_id]].flatten
|
408
|
+
}
|
409
|
+
}
|
410
|
+
end
|
411
|
+
|
412
|
+
if options[:highlight]
|
413
|
+
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
414
|
+
end
|
415
|
+
|
416
|
+
self.__elasticsearch__.search(query_hash).results
|
417
|
+
end
|
183
418
|
end
|
184
419
|
end
|
185
420
|
|
186
421
|
class LiteBlob
|
187
422
|
include Linguist::BlobHelper
|
188
|
-
include EncodingHelper
|
189
423
|
|
190
424
|
attr_accessor :id, :name, :path, :data, :commit_id
|
191
425
|
|
@@ -195,6 +429,37 @@ module Elasticsearch
|
|
195
429
|
@name = @path.split("/").last
|
196
430
|
@data = encode!(repo.lookup(@id).content)
|
197
431
|
end
|
432
|
+
|
433
|
+
def encode!(message)
|
434
|
+
return nil unless message.respond_to? :force_encoding
|
435
|
+
|
436
|
+
# if message is utf-8 encoding, just return it
|
437
|
+
message.force_encoding("UTF-8")
|
438
|
+
return message if message.valid_encoding?
|
439
|
+
|
440
|
+
# return message if message type is binary
|
441
|
+
detect = CharlockHolmes::EncodingDetector.detect(message)
|
442
|
+
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
443
|
+
|
444
|
+
# encoding message to detect encoding
|
445
|
+
if detect && detect[:encoding]
|
446
|
+
message.force_encoding(detect[:encoding])
|
447
|
+
end
|
448
|
+
|
449
|
+
# encode and clean the bad chars
|
450
|
+
message.replace clean(message)
|
451
|
+
rescue
|
452
|
+
encoding = detect ? detect[:encoding] : "unknown"
|
453
|
+
"--broken encoding: #{encoding}"
|
454
|
+
end
|
455
|
+
|
456
|
+
private
|
457
|
+
|
458
|
+
def clean(message)
|
459
|
+
message.encode("UTF-16BE", :undef => :replace, :invalid => :replace, :replace => "")
|
460
|
+
.encode("UTF-8")
|
461
|
+
.gsub("\0".encode("UTF-8"), "")
|
462
|
+
end
|
198
463
|
end
|
199
464
|
end
|
200
465
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch-git
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey Kumanyaev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch-model
|
@@ -53,7 +53,21 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: charlock_holmes
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gitlab-linguist
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - '>='
|