elasticsearch-git 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1a7d0d063702f3bcc2f71f5a8ad9b55ec0377280
4
+ data.tar.gz: 656199334b4345491eddb15fd3f4e3ffd0a0b597
5
+ SHA512:
6
+ metadata.gz: bf49c347091bb8866356b74bed98b9652b44474d7e5e5341257f06bf759a0eaf48efdf46ccdafa4211a2a7d3e98d390f33ff3b59900a3f7bc737c1020b25be75
7
+ data.tar.gz: 99870c02c0532ed6f72b70ee7e962c778148d7bb794a73f20eebd0cafa74e859baf1fbc1d3443fcb90b87280d06018f40f43d8299651a7413985588b218f2c6e
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elasticsearch-git.gemspec
4
+ gemspec
5
+
6
+ gem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'
7
+ gem 'elasticsearch-api', git: 'git://github.com/elasticsearch/elasticsearch-ruby.git'
8
+ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development', submodules: true
9
+ gem 'bundler'
10
+ gem 'rake'
11
+ gem 'pry'
12
+ gem 'gitlab_git'
13
+ gem 'minitest'
14
+ gem 'activesupport', '> 4.0.0'
15
+ gem 'activemodel', '> 4.0.0'
16
+ gem 'wrong'
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Andrey Kumanyaev
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # Elasticsearch::Git
2
+
3
+ Attention: Pre-pre-pre beta code. Not production.
4
+
5
+ [Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for git repositories
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ``` ruby
12
+ gem 'elasticsearch-git'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ``` bash
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ``` bash
24
+ $ gem install elasticsearch-git
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ``` ruby
30
+ class Repository
31
+ include Elasticsearch::Git::Repository
32
+
33
+ set_repository_id project.id
34
+ repository_for_indexing '/path/to/your/repo'
35
+
36
+ end
37
+
38
+ Repository.__elasticsearch__.create_index! force: true
39
+
40
+ repo = Repository.new
41
+ repo.index_commits
42
+ repo.index_blobs
43
+
44
+ Repository.search("query", type: 'blob')
45
+ Repository.search("query", type: 'commit')
46
+
47
+ # Search in all types
48
+ Repository.search("query")
49
+ ```
50
+
51
+ ## Contributing
52
+
53
+ 1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
54
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
55
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
56
+ 4. Push to the branch (`git push origin my-new-feature`)
57
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch/git/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elasticsearch-git"
8
+ spec.version = Elasticsearch::Git::VERSION
9
+ spec.authors = ["Andrey Kumanyaev"]
10
+ spec.email = ["me@zzet.org"]
11
+ spec.summary = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for git repositories.}
13
+ spec.homepage = "https://github.com/zzet/elasticsearch-git"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'elasticsearch-model'
22
+ spec.add_runtime_dependency 'elasticsearch-api'
23
+ spec.add_runtime_dependency 'rugged'
24
+ spec.add_runtime_dependency 'gitlab_git'
25
+ spec.add_runtime_dependency 'activemodel'
26
+ spec.add_runtime_dependency 'activesupport'
27
+ end
@@ -0,0 +1,12 @@
1
+ require "elasticsearch/git/version"
2
+ require "elasticsearch/git/model"
3
+ require "elasticsearch/git/commit"
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ class Test
8
+ include Elasticsearch::Git::Model
9
+ end
10
+ end
11
+ end
12
+
@@ -0,0 +1,68 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch/model'
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ module Model
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ extend ActiveModel::Naming
12
+ include ActiveModel::Model
13
+ include Elasticsearch::Model
14
+
15
+ index_name [self.name.downcase, 'index', Rails.env.to_s].join('-')
16
+
17
+ settings \
18
+ index: {
19
+ analysis: {
20
+ analyzer: {
21
+ human_analyzer: {
22
+ type: 'custom',
23
+ tokenizer: 'human_tokenizer',
24
+ filter: %w(lowercase asciifolding human_ngrams)
25
+ },
26
+ sha_analyzer: {
27
+ type: 'custom',
28
+ tokenizer: 'sha_tokenizer',
29
+ filter: %w(lowercase asciifolding sha_ngrams)
30
+ },
31
+ code_analyzer: {
32
+ type: 'custom',
33
+ tokenizer: 'standard',
34
+ filter: %w(lowercase asciifolding)
35
+ }
36
+ },
37
+ tokenizer: {
38
+ sha_tokenizer: {
39
+ type: "NGram",
40
+ min_gram: 8,
41
+ max_gram: 40,
42
+ token_chars: %w(letter digit)
43
+ },
44
+ human_tokenizer: {
45
+ type: "NGram",
46
+ min_gram: 1,
47
+ max_gram: 20,
48
+ token_chars: %w(letter digit)
49
+ }
50
+ },
51
+ filter: {
52
+ human_ngrams: {
53
+ type: "NGram",
54
+ min_gram: 1,
55
+ max_gram: 20
56
+ },
57
+ sha_ngrams: {
58
+ type: "NGram",
59
+ min_gram: 8,
60
+ max_gram: 40
61
+ }
62
+ }
63
+ }
64
+ }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,200 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch'
4
+ require 'elasticsearch/model'
5
+ require 'rugged'
6
+ require 'gitlab_git'
7
+
8
+ module Elasticsearch
9
+ module Git
10
+ module Repository
11
+ extend ActiveSupport::Concern
12
+
13
+ included do
14
+ include Elasticsearch::Git::Model
15
+
16
+ mapping do
17
+ indexes :blobs do
18
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
20
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
+ end
23
+ indexes :commits do
24
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
25
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
26
+ indexes :author do
27
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
28
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
29
+ indexes :time, type: :date
30
+ end
31
+ indexes :commiter do
32
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
33
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
34
+ indexes :time, type: :date
35
+ end
36
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
37
+ end
38
+ end
39
+
40
+ # Indexing all text-like blobs in repository
41
+ #
42
+ # All data stored in global index
43
+ # Repository can be selected by 'rid' field
44
+ # If you want - this field can be used for store 'project' id
45
+ #
46
+ # blob {
47
+ # id - uniq id of blob from all repositories
48
+ # oid - blob id in repository
49
+ # content - blob content
50
+ # commit_sha - last actual commit sha
51
+ # }
52
+ #
53
+ # For search from blobs use type 'blob'
54
+ def index_blobs
55
+ target_sha = repository_for_indexing.head.target
56
+ repository_for_indexing.index.each do |blob|
57
+ b = LiteBlob.new(repository_for_indexing, blob)
58
+ if b.text?
59
+ client_for_indexing.index \
60
+ index: "#{self.class.index_name}",
61
+ type: "blob",
62
+ id: "#{repository_id}_#{b.path}",
63
+ body: {
64
+ blob: {
65
+ oid: b.id,
66
+ rid: repository_id,
67
+ content: b.data,
68
+ commit_sha: target_sha
69
+ }
70
+ }
71
+ end
72
+ end
73
+ end
74
+
75
+ # Indexing all commits in repository
76
+ #
77
+ # All data stored in global index
78
+ # Repository can be filtered by 'rid' field
79
+ # If you want - this field can be used git store 'project' id
80
+ #
81
+ # commit {
82
+ # sha - commit sha
83
+ # author {
84
+ # name - commit author name
85
+ # email - commit author email
86
+ # time - commit time
87
+ # }
88
+ # commiter {
89
+ # name - committer name
90
+ # email - committer email
91
+ # time - commit time
92
+ # }
93
+ # message - commit message
94
+ # }
95
+ #
96
+ # For search from commits use type 'commit'
97
+ def index_commits
98
+ repository_for_indexing.each_id do |oid|
99
+ obj = repository_for_indexing.lookup(oid)
100
+ if obj.type == :commit
101
+ client_for_indexing.index \
102
+ index: "#{self.class.index_name}",
103
+ type: "commit",
104
+ id: "#{repository_id}_#{obj.oid}",
105
+ body: {
106
+ commit: {
107
+ rid: repository_id,
108
+ sha: obj.oid,
109
+ author: obj.author,
110
+ committer: obj.committer,
111
+ message: obj.message
112
+ }
113
+ }
114
+ end
115
+ end
116
+ end
117
+
118
+ # Representation of repository as indexed json
119
+ # Attention: It can be very very very huge hash
120
+ def as_indexed_json(options = {})
121
+ ij = {}
122
+ ij[:blobs] = index_blobs_array
123
+ ij[:commits] = index_commits_array
124
+ ij
125
+ end
126
+
127
+ # Indexing blob from current index
128
+ def index_blobs_array
129
+ result = []
130
+
131
+ target_sha = repository_for_indexing.head.target
132
+ repository_for_indexing.index.each do |blob|
133
+ b = EasyBlob.new(repository_for_indexing, blob)
134
+ result.push(
135
+ {
136
+ id: "#{target_sha}_#{b.path}",
137
+ oid: b.id,
138
+ content: b.data,
139
+ commit_sha: target_sha
140
+ }
141
+ ) if b.text?
142
+ end
143
+
144
+ result
145
+ end
146
+
147
+ # Lookup all object ids for commit objects
148
+ def index_commits_array
149
+ res = []
150
+
151
+ repository_for_indexing.each_id do |oid|
152
+ obj = repository_for_indexing.lookup(oid)
153
+ if obj.type == :commit
154
+ res.push(
155
+ {
156
+ sha: obj.oid,
157
+ author: obj.author,
158
+ committer: obj.committer,
159
+ message: obj.message
160
+ }
161
+ )
162
+ end
163
+ end
164
+
165
+ res
166
+ end
167
+
168
+ # Repository id used for identity data from different repositories
169
+ # Update this value if need
170
+ def set_repository_id id
171
+ @repository_id = id || path_to_repo
172
+ end
173
+
174
+ def repository_for_indexing(repo_path = "")
175
+ @path_to_repo ||= repo_path
176
+ Rugged::Repository.new(@path_to_repo)
177
+ end
178
+
179
+ def client_for_indexing
180
+ @client_for_indexing ||= Elasticsearch::Client.new log: true
181
+ end
182
+
183
+ end
184
+ end
185
+
186
+ class LiteBlob
187
+ include Linguist::BlobHelper
188
+ include EncodingHelper
189
+
190
+ attr_accessor :id, :name, :path, :data, :commit_id
191
+
192
+ def initialize(repo, raw_blob_hash)
193
+ @id = raw_blob_hash[:oid]
194
+ @path = raw_blob_hash[:path]
195
+ @name = @path.split("/").last
196
+ @data = encode!(repo.lookup(@id).content)
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,5 @@
1
+ module Elasticsearch
2
+ module Git
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,21 @@
1
+ require "rubygems"
2
+ require 'bundler/setup'
3
+ require 'pry'
4
+
5
+ Bundler.require
6
+
7
+ require 'wrong/adapters/minitest'
8
+
9
+ PROJECT_ROOT = File.join(Dir.pwd)
10
+
11
+ Wrong.config.color
12
+
13
+ Minitest.autorun
14
+
15
+ class TestCase < Minitest::Test
16
+ include Wrong
17
+
18
+ def fixtures_path
19
+ @path ||= File.expand_path(File.join(__FILE__, "../fixtures"))
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch-git
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrey Kumanyaev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: elasticsearch-model
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: elasticsearch-api
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rugged
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: gitlab_git
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activemodel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Elasticsearch integrations for git repositories.
98
+ email:
99
+ - me@zzet.org
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - .gitignore
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.md
108
+ - Rakefile
109
+ - elasticsearch-git.gemspec
110
+ - lib/elasticsearch/git.rb
111
+ - lib/elasticsearch/git/model.rb
112
+ - lib/elasticsearch/git/repository.rb
113
+ - lib/elasticsearch/git/version.rb
114
+ - lib/test/test_helper.rb
115
+ homepage: https://github.com/zzet/elasticsearch-git
116
+ licenses:
117
+ - MIT
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.0.3
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Elasticsearch integrations for git repositories.
139
+ test_files: []