elasticsearch-git 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1a7d0d063702f3bcc2f71f5a8ad9b55ec0377280
4
+ data.tar.gz: 656199334b4345491eddb15fd3f4e3ffd0a0b597
5
+ SHA512:
6
+ metadata.gz: bf49c347091bb8866356b74bed98b9652b44474d7e5e5341257f06bf759a0eaf48efdf46ccdafa4211a2a7d3e98d390f33ff3b59900a3f7bc737c1020b25be75
7
+ data.tar.gz: 99870c02c0532ed6f72b70ee7e962c778148d7bb794a73f20eebd0cafa74e859baf1fbc1d3443fcb90b87280d06018f40f43d8299651a7413985588b218f2c6e
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elasticsearch-git.gemspec
4
+ gemspec
5
+
6
+ gem 'elasticsearch-model', github: 'elasticsearch/elasticsearch-rails'
7
+ gem 'elasticsearch-api', git: 'git://github.com/elasticsearch/elasticsearch-ruby.git'
8
+ gem 'rugged', git: 'git://github.com/libgit2/rugged.git', branch: 'development', submodules: true
9
+ gem 'bundler'
10
+ gem 'rake'
11
+ gem 'pry'
12
+ gem 'gitlab_git'
13
+ gem 'minitest'
14
+ gem 'activesupport', '> 4.0.0'
15
+ gem 'activemodel', '> 4.0.0'
16
+ gem 'wrong'
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Andrey Kumanyaev
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # Elasticsearch::Git
2
+
3
+ Attention: Pre-pre-pre beta code. Not production.
4
+
5
+ [Elasticsearch](https://github.com/elasticsearch/elasticsearch-rails/tree/master/elasticsearch-model) integrations for git repositories
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ``` ruby
12
+ gem 'elasticsearch-git'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ``` bash
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ``` bash
24
+ $ gem install elasticsearch-git
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ``` ruby
30
+ class Repository
31
+ include Elasticsearch::Git::Repository
32
+
33
+ set_repository_id project.id
34
+ repository_for_indexing '/path/to/your/repo'
35
+
36
+ end
37
+
38
+ Repository.__elasticsearch__.create_index! force: true
39
+
40
+ repo = Repository.new
41
+ repo.index_commits
42
+ repo.index_blobs
43
+
44
+ Repository.search("query", type: 'blob')
45
+ Repository.search("query", type: 'commit')
46
+
47
+ # Search in all types
48
+ Repository.search("query")
49
+ ```
50
+
51
+ ## Contributing
52
+
53
+ 1. Fork it ( http://github.com/[my-github-username]/elasticsearch-git/fork )
54
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
55
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
56
+ 4. Push to the branch (`git push origin my-new-feature`)
57
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch/git/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elasticsearch-git"
8
+ spec.version = Elasticsearch::Git::VERSION
9
+ spec.authors = ["Andrey Kumanyaev"]
10
+ spec.email = ["me@zzet.org"]
11
+ spec.summary = %q{Elasticsearch integrations for git repositories.}
12
+ spec.description = %q{Elasticsearch integrations for git repositories.}
13
+ spec.homepage = "https://github.com/zzet/elasticsearch-git"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'elasticsearch-model'
22
+ spec.add_runtime_dependency 'elasticsearch-api'
23
+ spec.add_runtime_dependency 'rugged'
24
+ spec.add_runtime_dependency 'gitlab_git'
25
+ spec.add_runtime_dependency 'activemodel'
26
+ spec.add_runtime_dependency 'activesupport'
27
+ end
@@ -0,0 +1,12 @@
1
+ require "elasticsearch/git/version"
2
+ require "elasticsearch/git/model"
3
+ require "elasticsearch/git/commit"
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ class Test
8
+ include Elasticsearch::Git::Model
9
+ end
10
+ end
11
+ end
12
+
@@ -0,0 +1,68 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch/model'
4
+
5
+ module Elasticsearch
6
+ module Git
7
+ module Model
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ extend ActiveModel::Naming
12
+ include ActiveModel::Model
13
+ include Elasticsearch::Model
14
+
15
+ index_name [self.name.downcase, 'index', Rails.env.to_s].join('-')
16
+
17
+ settings \
18
+ index: {
19
+ analysis: {
20
+ analyzer: {
21
+ human_analyzer: {
22
+ type: 'custom',
23
+ tokenizer: 'human_tokenizer',
24
+ filter: %w(lowercase asciifolding human_ngrams)
25
+ },
26
+ sha_analyzer: {
27
+ type: 'custom',
28
+ tokenizer: 'sha_tokenizer',
29
+ filter: %w(lowercase asciifolding sha_ngrams)
30
+ },
31
+ code_analyzer: {
32
+ type: 'custom',
33
+ tokenizer: 'standard',
34
+ filter: %w(lowercase asciifolding)
35
+ }
36
+ },
37
+ tokenizer: {
38
+ sha_tokenizer: {
39
+ type: "NGram",
40
+ min_gram: 8,
41
+ max_gram: 40,
42
+ token_chars: %w(letter digit)
43
+ },
44
+ human_tokenizer: {
45
+ type: "NGram",
46
+ min_gram: 1,
47
+ max_gram: 20,
48
+ token_chars: %w(letter digit)
49
+ }
50
+ },
51
+ filter: {
52
+ human_ngrams: {
53
+ type: "NGram",
54
+ min_gram: 1,
55
+ max_gram: 20
56
+ },
57
+ sha_ngrams: {
58
+ type: "NGram",
59
+ min_gram: 8,
60
+ max_gram: 40
61
+ }
62
+ }
63
+ }
64
+ }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,200 @@
1
+ require 'active_support/concern'
2
+ require 'active_model'
3
+ require 'elasticsearch'
4
+ require 'elasticsearch/model'
5
+ require 'rugged'
6
+ require 'gitlab_git'
7
+
8
+ module Elasticsearch
9
+ module Git
10
+ module Repository
11
+ extend ActiveSupport::Concern
12
+
13
+ included do
14
+ include Elasticsearch::Git::Model
15
+
16
+ mapping do
17
+ indexes :blobs do
18
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
19
+ indexes :oid, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
20
+ indexes :commit_sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
21
+ indexes :content, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
22
+ end
23
+ indexes :commits do
24
+ indexes :id, type: :string, index_options: 'offsets', search_analyzer: :human_analyzer, index_analyzer: :human_analyzer
25
+ indexes :sha, type: :string, index_options: 'offsets', search_analyzer: :sha_analyzer, index_analyzer: :sha_analyzer
26
+ indexes :author do
27
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
28
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
29
+ indexes :time, type: :date
30
+ end
31
+ indexes :commiter do
32
+ indexes :name, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
33
+ indexes :email, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
34
+ indexes :time, type: :date
35
+ end
36
+ indexes :message, type: :string, index_options: 'offsets', search_analyzer: :code_analyzer, index_analyzer: :human_analyzer
37
+ end
38
+ end
39
+
40
+ # Indexing all text-like blobs in repository
41
+ #
42
+ # All data stored in global index
43
+ # Repository can be selected by 'rid' field
44
+ # If you want - this field can be used for store 'project' id
45
+ #
46
+ # blob {
47
+ # id - uniq id of blob from all repositories
48
+ # oid - blob id in repository
49
+ # content - blob content
50
+ # commit_sha - last actual commit sha
51
+ # }
52
+ #
53
+ # For search from blobs use type 'blob'
54
+ def index_blobs
55
+ target_sha = repository_for_indexing.head.target
56
+ repository_for_indexing.index.each do |blob|
57
+ b = LiteBlob.new(repository_for_indexing, blob)
58
+ if b.text?
59
+ client_for_indexing.index \
60
+ index: "#{self.class.index_name}",
61
+ type: "blob",
62
+ id: "#{repository_id}_#{b.path}",
63
+ body: {
64
+ blob: {
65
+ oid: b.id,
66
+ rid: repository_id,
67
+ content: b.data,
68
+ commit_sha: target_sha
69
+ }
70
+ }
71
+ end
72
+ end
73
+ end
74
+
75
+ # Indexing all commits in repository
76
+ #
77
+ # All data stored in global index
78
+ # Repository can be filtered by 'rid' field
79
+ # If you want - this field can be used git store 'project' id
80
+ #
81
+ # commit {
82
+ # sha - commit sha
83
+ # author {
84
+ # name - commit author name
85
+ # email - commit author email
86
+ # time - commit time
87
+ # }
88
+ # commiter {
89
+ # name - committer name
90
+ # email - committer email
91
+ # time - commit time
92
+ # }
93
+ # message - commit message
94
+ # }
95
+ #
96
+ # For search from commits use type 'commit'
97
+ def index_commits
98
+ repository_for_indexing.each_id do |oid|
99
+ obj = repository_for_indexing.lookup(oid)
100
+ if obj.type == :commit
101
+ client_for_indexing.index \
102
+ index: "#{self.class.index_name}",
103
+ type: "commit",
104
+ id: "#{repository_id}_#{obj.oid}",
105
+ body: {
106
+ commit: {
107
+ rid: repository_id,
108
+ sha: obj.oid,
109
+ author: obj.author,
110
+ committer: obj.committer,
111
+ message: obj.message
112
+ }
113
+ }
114
+ end
115
+ end
116
+ end
117
+
118
+ # Representation of repository as indexed json
119
+ # Attention: It can be very very very huge hash
120
+ def as_indexed_json(options = {})
121
+ ij = {}
122
+ ij[:blobs] = index_blobs_array
123
+ ij[:commits] = index_commits_array
124
+ ij
125
+ end
126
+
127
+ # Indexing blob from current index
128
+ def index_blobs_array
129
+ result = []
130
+
131
+ target_sha = repository_for_indexing.head.target
132
+ repository_for_indexing.index.each do |blob|
133
+ b = EasyBlob.new(repository_for_indexing, blob)
134
+ result.push(
135
+ {
136
+ id: "#{target_sha}_#{b.path}",
137
+ oid: b.id,
138
+ content: b.data,
139
+ commit_sha: target_sha
140
+ }
141
+ ) if b.text?
142
+ end
143
+
144
+ result
145
+ end
146
+
147
+ # Lookup all object ids for commit objects
148
+ def index_commits_array
149
+ res = []
150
+
151
+ repository_for_indexing.each_id do |oid|
152
+ obj = repository_for_indexing.lookup(oid)
153
+ if obj.type == :commit
154
+ res.push(
155
+ {
156
+ sha: obj.oid,
157
+ author: obj.author,
158
+ committer: obj.committer,
159
+ message: obj.message
160
+ }
161
+ )
162
+ end
163
+ end
164
+
165
+ res
166
+ end
167
+
168
+ # Repository id used for identity data from different repositories
169
+ # Update this value if need
170
+ def set_repository_id id
171
+ @repository_id = id || path_to_repo
172
+ end
173
+
174
+ def repository_for_indexing(repo_path = "")
175
+ @path_to_repo ||= repo_path
176
+ Rugged::Repository.new(@path_to_repo)
177
+ end
178
+
179
+ def client_for_indexing
180
+ @client_for_indexing ||= Elasticsearch::Client.new log: true
181
+ end
182
+
183
+ end
184
+ end
185
+
186
+ class LiteBlob
187
+ include Linguist::BlobHelper
188
+ include EncodingHelper
189
+
190
+ attr_accessor :id, :name, :path, :data, :commit_id
191
+
192
+ def initialize(repo, raw_blob_hash)
193
+ @id = raw_blob_hash[:oid]
194
+ @path = raw_blob_hash[:path]
195
+ @name = @path.split("/").last
196
+ @data = encode!(repo.lookup(@id).content)
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,5 @@
1
+ module Elasticsearch
2
+ module Git
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,21 @@
1
+ require "rubygems"
2
+ require 'bundler/setup'
3
+ require 'pry'
4
+
5
+ Bundler.require
6
+
7
+ require 'wrong/adapters/minitest'
8
+
9
+ PROJECT_ROOT = File.join(Dir.pwd)
10
+
11
+ Wrong.config.color
12
+
13
+ Minitest.autorun
14
+
15
+ class TestCase < Minitest::Test
16
+ include Wrong
17
+
18
+ def fixtures_path
19
+ @path ||= File.expand_path(File.join(__FILE__, "../fixtures"))
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch-git
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrey Kumanyaev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: elasticsearch-model
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: elasticsearch-api
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rugged
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: gitlab_git
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activemodel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Elasticsearch integrations for git repositories.
98
+ email:
99
+ - me@zzet.org
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - .gitignore
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.md
108
+ - Rakefile
109
+ - elasticsearch-git.gemspec
110
+ - lib/elasticsearch/git.rb
111
+ - lib/elasticsearch/git/model.rb
112
+ - lib/elasticsearch/git/repository.rb
113
+ - lib/elasticsearch/git/version.rb
114
+ - lib/test/test_helper.rb
115
+ homepage: https://github.com/zzet/elasticsearch-git
116
+ licenses:
117
+ - MIT
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.0.3
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Elasticsearch integrations for git repositories.
139
+ test_files: []