elasticsearch-git 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -1
- data/README.md +258 -2
- data/elasticsearch-git.gemspec +2 -1
- data/lib/elasticsearch/git.rb +1 -4
- data/lib/elasticsearch/git/model.rb +7 -1
- data/lib/elasticsearch/git/repository.rb +307 -42
- data/lib/elasticsearch/git/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 743ab16ca2c38bbe9ab23dc7447c5c41d578f415
|
4
|
+
data.tar.gz: 4fba8f5da92706100fa6b01e582cbcd16a83d5ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efe576c9de405aa4b22c89cfe21171d6de7ac5e686ed4207e73e085cf83d673b114dced51cc6ecb051b6f38aeba9227bf45619cd8f65b7a228101a703d250599
|
7
|
+
data.tar.gz: 2e18154c17713a627d3d4b9519f279348bcdccd0779819e7164f1d01e9c62092dc8c5c408b34782e2f46f6c8778a8c52fa75a3985537fcfb30baa36c5088cd66
|
data/Gemfile
CHANGED
@@ -9,7 +9,8 @@ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development',
|
|
9
9
|
gem 'bundler'
|
10
10
|
gem 'rake'
|
11
11
|
gem 'pry'
|
12
|
-
gem '
|
12
|
+
gem 'charlock_holmes', '~> 0.6.9'
|
13
|
+
gem 'gitlab-linguist', '~> 3.0.0'
|
13
14
|
gem 'minitest'
|
14
15
|
gem 'activesupport', '> 4.0.0'
|
15
16
|
gem 'activemodel', '> 4.0.0'
|
data/README.md
CHANGED
@@ -30,9 +30,11 @@ $ gem install elasticsearch-git
|
|
30
30
|
class Repository
|
31
31
|
include Elasticsearch::Git::Repository
|
32
32
|
|
33
|
-
|
34
|
-
|
33
|
+
def repository_id
|
34
|
+
project.id
|
35
|
+
end
|
35
36
|
|
37
|
+
repository_for_indexing '/path/to/your/repo'
|
36
38
|
end
|
37
39
|
|
38
40
|
Repository.__elasticsearch__.create_index! force: true
|
@@ -41,6 +43,9 @@ repo = Repository.new
|
|
41
43
|
repo.index_commits
|
42
44
|
repo.index_blobs
|
43
45
|
|
46
|
+
repo.index_commits(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
|
47
|
+
repo.index_blobs(from_rev: "1802bafa70d3b1678cfa46a482fd396dd8a4bd40", to_rev: "8d4175e9f4a36065b52fa752c1fd3594c82c0f28")
|
48
|
+
|
44
49
|
Repository.search("query", type: 'blob')
|
45
50
|
Repository.search("query", type: 'commit')
|
46
51
|
|
@@ -48,6 +53,257 @@ Repository.search("query", type: 'commit')
|
|
48
53
|
Repository.search("query")
|
49
54
|
```
|
50
55
|
|
56
|
+
## Integration with Gitlab
|
57
|
+
|
58
|
+
``` ruby
|
59
|
+
# app/elastic/repositories_search.rb
|
60
|
+
module RepositoriesSearch
|
61
|
+
extend ActiveSupport::Concern
|
62
|
+
|
63
|
+
included do
|
64
|
+
include Elasticsearch::Git::Repository
|
65
|
+
|
66
|
+
def repository_id
|
67
|
+
project.id
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# app/models/repository.rb
|
74
|
+
class Repository
|
75
|
+
include RepositoriesSearch
|
76
|
+
#...
|
77
|
+
def project
|
78
|
+
@project ||= Project.find_with_namespace(@path_with_namespace)
|
79
|
+
end
|
80
|
+
#...
|
81
|
+
end
|
82
|
+
|
83
|
+
Project.last.repository.__elasticsearch__.create_index! force: true
|
84
|
+
Project.last.repository.index_commits
|
85
|
+
Project.last.repository.index_blobs
|
86
|
+
|
87
|
+
Repository.search("some_query")
|
88
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
89
|
+
|
90
|
+
Repository.search("some_query", type: :blob)
|
91
|
+
# => {blobs: [{}, {}, {}], commits: []}
|
92
|
+
|
93
|
+
Repository.search("some_query", type: :commit)
|
94
|
+
# => {blobs: [], commits: [{}, {}, {}]}
|
95
|
+
|
96
|
+
Repository.search("some_query", type: :commit, page: 2, per: 50)
|
97
|
+
# => ...
|
98
|
+
|
99
|
+
Repository.search("some_query", options: { repository_id: Project.last.id })
|
100
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
101
|
+
|
102
|
+
Repository.search("some_query", options: { repository_id: current_user.authorized_projects.ids })
|
103
|
+
# => {blobs: [{}, {}, {}], commits: [{}, {}, {}]}
|
104
|
+
|
105
|
+
Project.last.repository.search("Copyright")[:blobs].first
|
106
|
+
=> #<Elasticsearch::Model::Response::Result:0xbb84b3fc
|
107
|
+
@result=
|
108
|
+
{"_index"=>"repository-index-development",
|
109
|
+
"_type"=>"repository",
|
110
|
+
"_id"=>"4328_LICENSE.txt",
|
111
|
+
"_score"=>0.034848917,
|
112
|
+
"_source"=>
|
113
|
+
{"blob"=>
|
114
|
+
{"type"=>"blob",
|
115
|
+
"oid"=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
|
116
|
+
"rid"=>4328,
|
117
|
+
"content"=>
|
118
|
+
"Copyright (c) 2014 Andrey Kumanyaev\n\nMIT
|
119
|
+
License\n\nPermission is hereby granted, free of charge, to any person
|
120
|
+
obtaining\na copy of this software and associated documentation files
|
121
|
+
(the\n\"Software\"), to deal in the Software without restriction,
|
122
|
+
including\nwithout limitation the rights to use, copy, modify, merge,
|
123
|
+
publish,\ndistribute, sublicense, and/or sell copies of the Software,
|
124
|
+
and to\npermit persons to whom the Software is furnished to do so,
|
125
|
+
subject to\nthe following conditions:\n\nThe above copyright notice and
|
126
|
+
this permission notice shall be\nincluded in all copies or substantial
|
127
|
+
portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT
|
128
|
+
WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
|
129
|
+
THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
|
130
|
+
AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
131
|
+
BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
132
|
+
ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
133
|
+
CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
134
|
+
SOFTWARE.\n",
|
135
|
+
"commit_sha"=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}}}>
|
136
|
+
```
|
137
|
+
|
138
|
+
## Examples
|
139
|
+
|
140
|
+
After integration this gem into [Gitlab](https://github.com/gitlabhq/gitlabhq)
|
141
|
+
|
142
|
+
``` ruby
|
143
|
+
Repository.search("too")[:commits].first
|
144
|
+
=> #<Elasticsearch::Model::Response::Result:0xbb50dfdc
|
145
|
+
@result=
|
146
|
+
{"_index"=>"repository-index-development",
|
147
|
+
"_type"=>"repository",
|
148
|
+
"_id"=>"4328_1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
149
|
+
"_score"=>0.15873253,
|
150
|
+
"_source"=>
|
151
|
+
{"commit"=>
|
152
|
+
{"type"=>"commit",
|
153
|
+
"rid"=>4328,
|
154
|
+
"sha"=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
155
|
+
"author"=>
|
156
|
+
{"name"=>"Andrey Kumanyaev",
|
157
|
+
"email"=>"me@zzet.org",
|
158
|
+
"time"=>"2014-02-16T02:24:23+04:00"},
|
159
|
+
"committer"=>
|
160
|
+
{"name"=>"Andrey Kumanyaev",
|
161
|
+
"email"=>"me@zzet.org",
|
162
|
+
"time"=>"2014-02-16T02:24:23+04:00"},
|
163
|
+
"message"=>"Save 2. Indexing work. Search too\n"}}}>
|
164
|
+
|
165
|
+
|
166
|
+
Project.last.repository.as_indexed_json
|
167
|
+
Project Load (1.7ms) SELECT "projects".* FROM "projects" ORDER BY "projects"."id" DESC LIMIT 1
|
168
|
+
Namespace Load (4.8ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = $1 ORDER BY "namespaces"."id" ASC LIMIT 1 [["id", 3739]]
|
169
|
+
Namespace Load (0.9ms) SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."path" = 'zzet' LIMIT 1
|
170
|
+
Project Load (0.7ms) SELECT "projects".* FROM "projects" WHERE "projects"."namespace_id" = 3739 AND "projects"."path" = 'elasticsearch-git' LIMIT 1
|
171
|
+
# Long lines are stripped manually
|
172
|
+
=> {:blobs=>
|
173
|
+
[[{:type=>"blob",
|
174
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_.gitignore",
|
175
|
+
:rid=>4328,
|
176
|
+
:oid=>"d87d4be66f458acd52878902bbf1391732ad21e1",
|
177
|
+
:content=>
|
178
|
+
"*.gem\n*.rbc\n.bundle\n.config\n.yardoc\nGemfile.lock\nInstalledFiles\n_yardoc\ncoverage\ndoc/\nlib/bundler/man\npkg\nrdoc\nspec/reports\ntest/tmp\ntest/version_tmp\ntmp\n",
|
179
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
180
|
+
{:type=>"blob",
|
181
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Gemfile",
|
182
|
+
:rid=>4328,
|
183
|
+
:oid=>"7322405f8f3ee5de24f7a727940ac52543e8954c",
|
184
|
+
:content=>
|
185
|
+
"source 'https://rubygems.org'\n\n# Specify your gem's dependencies in elasticsearch-git.gemspec\ngemspec\n\ngem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'\ngem 'elasticsearc....."
|
186
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
187
|
+
{:type=>"blob",
|
188
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_LICENSE.txt",
|
189
|
+
:rid=>4328,
|
190
|
+
:oid=>"f99909cd4ecb6f2ad08f8e55aac3a9fcd86a2bd2",
|
191
|
+
:content=>
|
192
|
+
"Copyright (c) 2014 Andrey Kumanyaev\n\nMIT License\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Softw...."
|
193
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
194
|
+
{:type=>"blob",
|
195
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_README.md",
|
196
|
+
:rid=>4328,
|
197
|
+
:oid=>"8258d574dfc8040a5d003f06c6493e0033527f36",
|
198
|
+
:content=>
|
199
|
+
"# Elasticsearch::Git\n\nAttention: Pre-pre-pre beta code. Not production.\n\n[Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for g...."
|
200
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
201
|
+
{:type=>"blob",
|
202
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_Rakefile",
|
203
|
+
:rid=>4328,
|
204
|
+
:oid=>"29955274e0d42e164337c411ad9144e8ffd7e46e",
|
205
|
+
:content=>"require \"bundler/gem_tasks\"\n",
|
206
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
207
|
+
{:type=>"blob",
|
208
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch-git.gemspec",
|
209
|
+
:rid=>4328,
|
210
|
+
:oid=>"67762437568dda1bb98ec5eca8be7e4a5c8115a9",
|
211
|
+
:content=>
|
212
|
+
"# coding: utf-8\nlib = File.expand_path('../lib', __FILE__)\n$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)\nrequire 'elasticsearch/git/version'\n\nGem::Specification.new do |spec|\n spec..."
|
213
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
214
|
+
{:type=>"blob",
|
215
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_elasticsearch/git.rb",
|
216
|
+
:rid=>4328,
|
217
|
+
:oid=>"d3817ec58af1f44dfd18856bf54ef2bf607901a8",
|
218
|
+
:content=>
|
219
|
+
"require \"elasticsearch/git/version\"\nrequire \"elasticsearch/git/model\"\nrequire \"elasticsearch/git/commit\"\n\nmodule Elasticsearch\n module Git\n class Test\n include Elasticsearch::..."
|
220
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
221
|
+
{:type=>"blob",
|
222
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/model.rb",
|
223
|
+
:rid=>4328,
|
224
|
+
:oid=>"3dfbae747f25391779fbe012fe8cc4f38cc4651c",
|
225
|
+
:content=>
|
226
|
+
"require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch/model'\n\nmodule Elasticsearch\n module Git\n module Model\n extend ActiveSupport::Concern\n\n include..."
|
227
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
228
|
+
{:type=>"blob",
|
229
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/repository.rb",
|
230
|
+
:rid=>4328,
|
231
|
+
:oid=>"70fe59c8391f6c27adb79c3e45824e6b4cf9566c",
|
232
|
+
:content=>
|
233
|
+
"require 'active_support/concern'\nrequire 'active_model'\nrequire 'elasticsearch'\nrequire 'elasticsearch/model'\nrequire 'rugged'\nrequire 'gitlab_git'\n\nmodule Elasticsearch\n module Git\n m..."
|
234
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
235
|
+
{:type=>"blob",
|
236
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_git/version.rb",
|
237
|
+
:rid=>4328,
|
238
|
+
:oid=>"79e8082b122492464732f1fb43e9f2bdc96ea146",
|
239
|
+
:content=>
|
240
|
+
"module Elasticsearch\n module Git\n VERSION = \"0.0.1\"\n end\nend\n",
|
241
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"},
|
242
|
+
{:type=>"blob",
|
243
|
+
:id=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28_test/test_helper.rb",
|
244
|
+
:rid=>4328,
|
245
|
+
:oid=>"6acc0d2b7bf0f286557d3757c1140b41ab57e8f7",
|
246
|
+
:content=>
|
247
|
+
"require \"rubygems\"\nrequire 'bundler/setup'\nrequire 'pry'\n\nBundler.require\n\nrequire 'wrong/adapters/minitest'\n\nPROJECT_ROOT = File.join(Dir.pwd)\n\nWrong.config.color\n\nMinitest.autorun\n..."
|
248
|
+
:commit_sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28"}]],
|
249
|
+
:commits=>
|
250
|
+
[{:type=>"commit",
|
251
|
+
:sha=>"8d4175e9f4a36065b52fa752c1fd3594c82c0f28",
|
252
|
+
:author=>
|
253
|
+
{:name=>"Andrey Kumanyaev",
|
254
|
+
:email=>"me@zzet.org",
|
255
|
+
:time=>2014-02-16 13:50:32 +0400},
|
256
|
+
:committer=>
|
257
|
+
{:name=>"Andrey Kumanyaev",
|
258
|
+
:email=>"me@zzet.org",
|
259
|
+
:time=>2014-02-16 13:50:32 +0400},
|
260
|
+
:message=>"Improve readme\n"},
|
261
|
+
{:type=>"commit",
|
262
|
+
:sha=>"37f1b0710eb7f41254ae0c33db09794a25bbb246",
|
263
|
+
:author=>
|
264
|
+
{:name=>"Andrey Kumanyaev",
|
265
|
+
:email=>"me@zzet.org",
|
266
|
+
:time=>2014-02-16 13:49:25 +0400},
|
267
|
+
:committer=>
|
268
|
+
{:name=>"Andrey Kumanyaev",
|
269
|
+
:email=>"me@zzet.org",
|
270
|
+
:time=>2014-02-16 13:49:25 +0400},
|
271
|
+
:message=>"prepare first test release\n"},
|
272
|
+
{:type=>"commit",
|
273
|
+
:sha=>"1802bafa70d3b1678cfa46a482fd396dd8a4bd40",
|
274
|
+
:author=>
|
275
|
+
{:name=>"Andrey Kumanyaev",
|
276
|
+
:email=>"me@zzet.org",
|
277
|
+
:time=>2014-02-16 02:24:23 +0400},
|
278
|
+
:committer=>
|
279
|
+
{:name=>"Andrey Kumanyaev",
|
280
|
+
:email=>"me@zzet.org",
|
281
|
+
:time=>2014-02-16 02:24:23 +0400},
|
282
|
+
:message=>"Save 2. Indexing work. Search too\n"},
|
283
|
+
{:type=>"commit",
|
284
|
+
:sha=>"3ed383bfbf6cba611d191dbc3590779c0444b7f0",
|
285
|
+
:author=>
|
286
|
+
{:name=>"Andrey Kumanyaev",
|
287
|
+
:email=>"me@zzet.org",
|
288
|
+
:time=>2014-02-16 00:23:10 +0400},
|
289
|
+
:committer=>
|
290
|
+
{:name=>"Andrey Kumanyaev",
|
291
|
+
:email=>"me@zzet.org",
|
292
|
+
:time=>2014-02-16 00:23:10 +0400},
|
293
|
+
:message=>"Save commit\n"},
|
294
|
+
{:type=>"commit",
|
295
|
+
:sha=>"7021addf520a19bdeceef29947c8687965c132ff",
|
296
|
+
:author=>
|
297
|
+
{:name=>"Andrey Kumanyaev",
|
298
|
+
:email=>"me@zzet.org",
|
299
|
+
:time=>2014-02-15 14:28:43 +0400},
|
300
|
+
:committer=>
|
301
|
+
{:name=>"Andrey Kumanyaev",
|
302
|
+
:email=>"me@zzet.org",
|
303
|
+
:time=>2014-02-15 14:28:43 +0400},
|
304
|
+
:message=>"first commit\n"}]}
|
305
|
+
```
|
306
|
+
|
51
307
|
## Contributing
|
52
308
|
|
53
309
|
1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
|
data/elasticsearch-git.gemspec
CHANGED
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency 'elasticsearch-model'
|
22
22
|
spec.add_runtime_dependency 'elasticsearch-api'
|
23
23
|
spec.add_runtime_dependency 'rugged'
|
24
|
-
spec.add_runtime_dependency '
|
24
|
+
spec.add_runtime_dependency 'charlock_holmes'
|
25
|
+
spec.add_runtime_dependency 'gitlab-linguist'
|
25
26
|
spec.add_runtime_dependency 'activemodel'
|
26
27
|
spec.add_runtime_dependency 'activesupport'
|
27
28
|
end
|
data/lib/elasticsearch/git.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
require "elasticsearch/git/version"
|
2
2
|
require "elasticsearch/git/model"
|
3
|
-
require "elasticsearch/git/
|
3
|
+
require "elasticsearch/git/repository"
|
4
4
|
|
5
5
|
module Elasticsearch
|
6
6
|
module Git
|
7
|
-
class Test
|
8
|
-
include Elasticsearch::Git::Model
|
9
|
-
end
|
10
7
|
end
|
11
8
|
end
|
12
9
|
|
@@ -12,7 +12,13 @@ module Elasticsearch
|
|
12
12
|
include ActiveModel::Model
|
13
13
|
include Elasticsearch::Model
|
14
14
|
|
15
|
-
|
15
|
+
env = if defined?(::Rails)
|
16
|
+
::Rails.env.to_s
|
17
|
+
else
|
18
|
+
"undefined"
|
19
|
+
end
|
20
|
+
|
21
|
+
index_name [self.name.downcase, 'index', env].join('-')
|
16
22
|
|
17
23
|
settings \
|
18
24
|
index: {
|
@@ -3,7 +3,7 @@ require 'active_model'
|
|
3
3
|
require 'elasticsearch'
|
4
4
|
require 'elasticsearch/model'
|
5
5
|
require 'rugged'
|
6
|
-
require '
|
6
|
+
require 'linguist'
|
7
7
|
|
8
8
|
module Elasticsearch
|
9
9
|
module Git
|
@@ -14,14 +14,16 @@ module Elasticsearch
|
|
14
14
|
include Elasticsearch::Git::Model
|
15
15
|
|
16
16
|
mapping do
|
17
|
-
indexes :
|
17
|
+
indexes :blob do
|
18
18
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
19
|
+
indexes :rid, type: :string, index: :not_analyzed
|
19
20
|
indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
20
21
|
indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
21
22
|
indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
|
22
23
|
end
|
23
|
-
indexes :
|
24
|
+
indexes :commit do
|
24
25
|
indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
|
26
|
+
indexes :rid, type: :string, index: :not_analyzed
|
25
27
|
indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
|
26
28
|
indexes :author do
|
27
29
|
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
|
@@ -51,23 +53,89 @@ module Elasticsearch
|
|
51
53
|
# }
|
52
54
|
#
|
53
55
|
# For search from blobs use type 'blob'
|
54
|
-
def index_blobs
|
55
|
-
|
56
|
-
|
56
|
+
def index_blobs(from_rev: nil, to_rev: nil)
|
57
|
+
|
58
|
+
if to_rev.present?
|
59
|
+
begin
|
60
|
+
raise unless repository_for_indexing.lookup(to_rev).type == :commit
|
61
|
+
rescue
|
62
|
+
raise ArgumentError, "'to_rev': '#{to_rev}' is a incorrect commit sha."
|
63
|
+
end
|
64
|
+
else
|
65
|
+
to_rev = repository_for_indexing.head.target
|
66
|
+
end
|
67
|
+
|
68
|
+
target_sha = to_rev
|
69
|
+
|
70
|
+
if from_rev.present?
|
71
|
+
begin
|
72
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
73
|
+
rescue
|
74
|
+
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
75
|
+
end
|
76
|
+
|
77
|
+
diff = repository_for_indexing.diff(from_rev, to_rev)
|
78
|
+
diff.deltas.reverse.each do |delta|
|
79
|
+
if delta.status == :deleted
|
80
|
+
b = LiteBlob.new(repository_for_indexing, delta.old_file)
|
81
|
+
delete_from_index_blob(b)
|
82
|
+
else
|
83
|
+
b = LiteBlob.new(repository_for_indexing, delta.new_file)
|
84
|
+
index_blob(b, target_sha)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
else
|
88
|
+
if repository_for_indexing.bare?
|
89
|
+
recurse_blobs_index(repository_for_indexing.lookup(target_sha).tree, target_sha)
|
90
|
+
else
|
91
|
+
repository_for_indexing.index.each do |blob|
|
92
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
93
|
+
index_blob(b, target_sha)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Indexing bare repository via walking through tree
|
100
|
+
def recurse_blobs_index(tree, target_sha, path = "")
|
101
|
+
tree.each_blob do |blob|
|
102
|
+
blob[:path] = path + blob[:name]
|
57
103
|
b = LiteBlob.new(repository_for_indexing, blob)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
104
|
+
index_blob(b, target_sha)
|
105
|
+
end
|
106
|
+
|
107
|
+
tree.each_tree do |nested_tree|
|
108
|
+
recurse_blobs_index(repository_for_indexing.lookup(nested_tree[:oid]), target_sha, "#{path}#{nested_tree[:name]}/")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def index_blob(blob, target_sha)
|
113
|
+
if blob.text?
|
114
|
+
client_for_indexing.index \
|
115
|
+
index: "#{self.class.index_name}",
|
116
|
+
type: "repository",
|
117
|
+
id: "#{repository_id}_#{blob.path}",
|
118
|
+
body: {
|
119
|
+
blob: {
|
120
|
+
type: "blob",
|
121
|
+
oid: blob.id,
|
122
|
+
rid: repository_id,
|
123
|
+
content: blob.data,
|
124
|
+
commit_sha: target_sha
|
70
125
|
}
|
126
|
+
}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def delete_from_index_blob(blob)
|
131
|
+
if blob.text?
|
132
|
+
begin
|
133
|
+
client_for_indexing.delete \
|
134
|
+
index: "#{self.class.index_name}",
|
135
|
+
type: "repository",
|
136
|
+
id: "#{repository_id}_#{blob.path}"
|
137
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
138
|
+
return true
|
71
139
|
end
|
72
140
|
end
|
73
141
|
end
|
@@ -94,27 +162,45 @@ module Elasticsearch
|
|
94
162
|
# }
|
95
163
|
#
|
96
164
|
# For search from commits use type 'commit'
|
97
|
-
def index_commits
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
165
|
+
def index_commits(from_rev: nil, to_rev: nil)
|
166
|
+
if from_rev.present? && to_rev.present?
|
167
|
+
begin
|
168
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
169
|
+
raise unless repository_for_indexing.lookup(from_rev).type == :commit
|
170
|
+
rescue
|
171
|
+
raise ArgumentError, "'from_rev': '#{from_rev}' is a incorrect commit sha."
|
172
|
+
end
|
173
|
+
|
174
|
+
repository_for_indexing.walk(from_rev, to_rev).each do |commit|
|
175
|
+
index_commit(commit)
|
176
|
+
end
|
177
|
+
else
|
178
|
+
repository_for_indexing.each_id do |oid|
|
179
|
+
obj = repository_for_indexing.lookup(oid)
|
180
|
+
if obj.type == :commit
|
181
|
+
index_commit(obj)
|
182
|
+
end
|
114
183
|
end
|
115
184
|
end
|
116
185
|
end
|
117
186
|
|
187
|
+
def index_commit(commit)
|
188
|
+
client_for_indexing.index \
|
189
|
+
index: "#{self.class.index_name}",
|
190
|
+
type: "repository",
|
191
|
+
id: "#{repository_id}_#{commit.oid}",
|
192
|
+
body: {
|
193
|
+
commit: {
|
194
|
+
type: "commit",
|
195
|
+
rid: repository_id,
|
196
|
+
sha: commit.oid,
|
197
|
+
author: commit.author,
|
198
|
+
committer: commit.committer,
|
199
|
+
message: commit.message
|
200
|
+
}
|
201
|
+
}
|
202
|
+
end
|
203
|
+
|
118
204
|
# Representation of repository as indexed json
|
119
205
|
# Attention: It can be very very very huge hash
|
120
206
|
def as_indexed_json(options = {})
|
@@ -129,19 +215,52 @@ module Elasticsearch
|
|
129
215
|
result = []
|
130
216
|
|
131
217
|
target_sha = repository_for_indexing.head.target
|
132
|
-
|
133
|
-
|
218
|
+
|
219
|
+
if repository_for_indexing.bare?
|
220
|
+
tree = repository_for_indexing.lookup(target_sha).tree
|
221
|
+
result.push(recurse_blobs_index_hash(tree))
|
222
|
+
else
|
223
|
+
repository_for_indexing.index.each do |blob|
|
224
|
+
b = EasyBlob.new(repository_for_indexing, blob)
|
225
|
+
result.push(
|
226
|
+
{
|
227
|
+
type: 'blob',
|
228
|
+
id: "#{target_sha}_#{b.path}",
|
229
|
+
rid: repository_id,
|
230
|
+
oid: b.id,
|
231
|
+
content: b.data,
|
232
|
+
commit_sha: target_sha
|
233
|
+
}
|
234
|
+
) if b.text?
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
result
|
239
|
+
end
|
240
|
+
|
241
|
+
def recurse_blobs_index_hash(tree, path = "")
|
242
|
+
result = []
|
243
|
+
|
244
|
+
tree.each_blob do |blob|
|
245
|
+
blob[:path] = path + blob[:name]
|
246
|
+
b = LiteBlob.new(repository_for_indexing, blob)
|
134
247
|
result.push(
|
135
248
|
{
|
136
|
-
|
249
|
+
type: 'blob',
|
250
|
+
id: "#{repository_for_indexing.head.target}_#{path}#{blob[:name]}",
|
251
|
+
rid: repository_id,
|
137
252
|
oid: b.id,
|
138
253
|
content: b.data,
|
139
|
-
commit_sha:
|
254
|
+
commit_sha: repository_for_indexing.head.target
|
140
255
|
}
|
141
256
|
) if b.text?
|
142
257
|
end
|
143
258
|
|
144
|
-
|
259
|
+
tree.each_tree do |nested_tree|
|
260
|
+
result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
|
261
|
+
end
|
262
|
+
|
263
|
+
result.flatten
|
145
264
|
end
|
146
265
|
|
147
266
|
# Lookup all object ids for commit objects
|
@@ -153,6 +272,7 @@ module Elasticsearch
|
|
153
272
|
if obj.type == :commit
|
154
273
|
res.push(
|
155
274
|
{
|
275
|
+
type: 'commit',
|
156
276
|
sha: obj.oid,
|
157
277
|
author: obj.author,
|
158
278
|
committer: obj.committer,
|
@@ -165,27 +285,141 @@ module Elasticsearch
|
|
165
285
|
res
|
166
286
|
end
|
167
287
|
|
288
|
+
def search(query, type: :all, page: 1, per: 20, options: {})
|
289
|
+
options[:repository_id] = repository_id if options[:repository_id].nil?
|
290
|
+
self.class.search(query, type: type, page: page, per: per, options: options)
|
291
|
+
end
|
292
|
+
|
168
293
|
# Repository id used for identity data from different repositories
|
169
294
|
# Update this value if need
|
170
|
-
def set_repository_id id
|
295
|
+
def set_repository_id id = nil
|
171
296
|
@repository_id = id || path_to_repo
|
172
297
|
end
|
173
298
|
|
299
|
+
# For Overwrite
|
300
|
+
def repository_id
|
301
|
+
@repository_id
|
302
|
+
end
|
303
|
+
|
304
|
+
unless defined?(path_to_repo)
|
305
|
+
def path_to_repo
|
306
|
+
if @path_to_repo.blank?
|
307
|
+
raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method'
|
308
|
+
else
|
309
|
+
@path_to_repo
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
174
314
|
def repository_for_indexing(repo_path = "")
|
175
315
|
@path_to_repo ||= repo_path
|
316
|
+
set_repository_id
|
176
317
|
Rugged::Repository.new(@path_to_repo)
|
177
318
|
end
|
178
319
|
|
179
320
|
def client_for_indexing
|
180
321
|
@client_for_indexing ||= Elasticsearch::Client.new log: true
|
181
322
|
end
|
323
|
+
end
|
324
|
+
|
325
|
+
module ClassMethods
|
326
|
+
def search(query, type: :all, page: 1, per: 20, options: {})
|
327
|
+
results = { blobs: [], commits: []}
|
328
|
+
case type.to_sym
|
329
|
+
when :all
|
330
|
+
results[:blobs] = search_blob(query, page: page, per: per, options: options)
|
331
|
+
results[:commits] = search_commit(query, page: page, per: per, options: options)
|
332
|
+
when :blob
|
333
|
+
results[:blobs] = search_blob(query, page: page, per: per, options: options)
|
334
|
+
when :commit
|
335
|
+
results[:commits] = search_commit(query, page: page, per: per, options: options)
|
336
|
+
end
|
337
|
+
|
338
|
+
results
|
339
|
+
end
|
340
|
+
|
341
|
+
def search_commit(query, page: 1, per: 20, options: {})
|
342
|
+
page ||= 1
|
343
|
+
|
344
|
+
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
|
345
|
+
|
346
|
+
query_hash = {
|
347
|
+
query: {
|
348
|
+
filtered: {
|
349
|
+
query: {
|
350
|
+
multi_match: {
|
351
|
+
fields: fields,
|
352
|
+
query: "#{query}",
|
353
|
+
operator: :and
|
354
|
+
}
|
355
|
+
},
|
356
|
+
},
|
357
|
+
},
|
358
|
+
size: per,
|
359
|
+
from: per * (page - 1)
|
360
|
+
}
|
361
|
+
|
362
|
+
if query.blank?
|
363
|
+
query_hash[:query][:filtered][:query] = { match_all: {}}
|
364
|
+
query_hash[:track_scores] = true
|
365
|
+
end
|
366
|
+
|
367
|
+
if options[:repository_id]
|
368
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
369
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
370
|
+
terms: {
|
371
|
+
"commit.rid" => [options[:repository_id]].flatten
|
372
|
+
}
|
373
|
+
}
|
374
|
+
end
|
375
|
+
|
376
|
+
if options[:highlight]
|
377
|
+
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
378
|
+
end
|
379
|
+
|
380
|
+
self.__elasticsearch__.search(query_hash).results
|
381
|
+
end
|
182
382
|
|
383
|
+
def search_blob(query, type: :all, page: 1, per: 20, options: {})
|
384
|
+
page ||= 1
|
385
|
+
|
386
|
+
query_hash = {
|
387
|
+
query: {
|
388
|
+
filtered: {
|
389
|
+
query: {
|
390
|
+
match: {
|
391
|
+
'blob.content' => {
|
392
|
+
query: "#{query}",
|
393
|
+
operator: :and
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
}
|
398
|
+
},
|
399
|
+
size: per,
|
400
|
+
from: per * (page - 1)
|
401
|
+
}
|
402
|
+
|
403
|
+
if options[:repository_id]
|
404
|
+
query_hash[:query][:filtered][:filter] ||= { and: [] }
|
405
|
+
query_hash[:query][:filtered][:filter][:and] << {
|
406
|
+
terms: {
|
407
|
+
"blob.rid" => [options[:repository_id]].flatten
|
408
|
+
}
|
409
|
+
}
|
410
|
+
end
|
411
|
+
|
412
|
+
if options[:highlight]
|
413
|
+
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
|
414
|
+
end
|
415
|
+
|
416
|
+
self.__elasticsearch__.search(query_hash).results
|
417
|
+
end
|
183
418
|
end
|
184
419
|
end
|
185
420
|
|
186
421
|
class LiteBlob
|
187
422
|
include Linguist::BlobHelper
|
188
|
-
include EncodingHelper
|
189
423
|
|
190
424
|
attr_accessor :id, :name, :path, :data, :commit_id
|
191
425
|
|
@@ -195,6 +429,37 @@ module Elasticsearch
|
|
195
429
|
@name = @path.split("/").last
|
196
430
|
@data = encode!(repo.lookup(@id).content)
|
197
431
|
end
|
432
|
+
|
433
|
+
def encode!(message)
|
434
|
+
return nil unless message.respond_to? :force_encoding
|
435
|
+
|
436
|
+
# if message is utf-8 encoding, just return it
|
437
|
+
message.force_encoding("UTF-8")
|
438
|
+
return message if message.valid_encoding?
|
439
|
+
|
440
|
+
# return message if message type is binary
|
441
|
+
detect = CharlockHolmes::EncodingDetector.detect(message)
|
442
|
+
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
443
|
+
|
444
|
+
# encoding message to detect encoding
|
445
|
+
if detect && detect[:encoding]
|
446
|
+
message.force_encoding(detect[:encoding])
|
447
|
+
end
|
448
|
+
|
449
|
+
# encode and clean the bad chars
|
450
|
+
message.replace clean(message)
|
451
|
+
rescue
|
452
|
+
encoding = detect ? detect[:encoding] : "unknown"
|
453
|
+
"--broken encoding: #{encoding}"
|
454
|
+
end
|
455
|
+
|
456
|
+
private
|
457
|
+
|
458
|
+
def clean(message)
|
459
|
+
message.encode("UTF-16BE", :undef => :replace, :invalid => :replace, :replace => "")
|
460
|
+
.encode("UTF-8")
|
461
|
+
.gsub("\0".encode("UTF-8"), "")
|
462
|
+
end
|
198
463
|
end
|
199
464
|
end
|
200
465
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch-git
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey Kumanyaev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch-model
|
@@ -53,7 +53,21 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: charlock_holmes
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gitlab-linguist
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - '>='
|