syphon 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +3 -2
- data/CHANGELOG +11 -0
- data/Gemfile +5 -3
- data/README.markdown +1 -62
- data/lib/syphon/builder.rb +8 -5
- data/lib/syphon/index.rb +21 -12
- data/lib/syphon/railtie.rb +1 -1
- data/lib/syphon/schema.rb +5 -0
- data/lib/syphon/version.rb +1 -1
- data/syphon.gemspec +1 -1
- data/test/syphon/test_builder.rb +19 -13
- data/test/syphon/test_index.rb +7 -0
- data/test/syphon/test_railtie.rb +8 -1
- data/test/test_helper.rb +18 -10
- metadata +18 -34
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6b4117676c9b04cf839c499c0e1923b39defb30
|
4
|
+
data.tar.gz: c95e03409f0b86a8aa6c404f8ba1583a1e033758
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7b68fea96fc36d64e29698a4dd217bbadb20d8b6369de324a4f602aee900cb76f568afb312999ef1808c863379c0e541fadfa8eaa7650ece627dc433b43281dc
|
7
|
+
data.tar.gz: 472a520467282278d21b6c8b79610cc6eb807fe5a204d4744de479b34290427de5b8c746c353fa5322bc3c3b4a41bffb50e7f5092249459a3ada0e16a735d468
|
data/.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
/.bundle
|
2
|
+
/Gemfile.lock
|
3
|
+
/test/config.yml
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.1.0 2014-03-18
|
2
|
+
|
3
|
+
* Clean up partially built index if building fails.
|
4
|
+
* Use an ENV_syphon database configuration in database.yml before falling back
|
5
|
+
to the standard one with Rails.
|
6
|
+
* Support ActiveSupport 4.
|
7
|
+
* Add :multi option for fields. This causes values to be indexed as arrays.
|
8
|
+
Arrays are no longer autodetected - fields which may be multivalued must be
|
9
|
+
declared as such, and will always be returned from ElasticSearch as an array
|
10
|
+
in ElasticSearch >= 1.0.
|
11
|
+
|
1
12
|
== 0.0.2 2013-12-12
|
2
13
|
|
3
14
|
* Add index_settings attribute to control sharding, replication, etc.
|
data/Gemfile
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
gemspec
|
3
3
|
|
4
|
-
gem 'debugger',
|
5
|
-
gem '
|
4
|
+
gem 'debugger', platform: :ruby_19
|
5
|
+
gem 'byebug', platform: [:ruby_20, :ruby_21]
|
6
|
+
gem 'minitest', '< 5'
|
7
|
+
gem 'looksee'
|
6
8
|
gem 'rails'
|
7
9
|
gem 'ritual', '~> 0.4.0'
|
8
|
-
gem 'temporaries'
|
10
|
+
gem 'temporaries'
|
data/README.markdown
CHANGED
@@ -2,65 +2,4 @@
|
|
2
2
|
|
3
3
|
Syphon data from an Arel source into ElasticSearch.
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
gem install syphon
|
8
|
-
|
9
|
-
## Usage
|
10
|
-
|
11
|
-
class UsersIndex
|
12
|
-
include Syphon::Index
|
13
|
-
|
14
|
-
define_source do
|
15
|
-
# Define some attributes. Options like "index: :not_analyzed" are passed
|
16
|
-
# to elasticsearch.
|
17
|
-
|
18
|
-
string :login, index: :not_analyzed
|
19
|
-
string :name
|
20
|
-
integer :age
|
21
|
-
geo_point :location
|
22
|
-
string :bio
|
23
|
-
nested_documents do
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
## Why?
|
30
|
-
|
31
|
-
In 2 words: fast indexing.
|
32
|
-
|
33
|
-
There is no shortage of ElasticSearch adapters for ruby. The issue is that they
|
34
|
-
typically work by defining fields as attributes (methods) on your model
|
35
|
-
instances, which means to index a lot of data, you need to roundtrip your data
|
36
|
-
through heavy ruby models, which can be quite slow, particularly if your models
|
37
|
-
are made of ActiveRecord.
|
38
|
-
|
39
|
-
Syphon makes no such assumption. You define your fields to index using SQL
|
40
|
-
expressions. These are assembled into a single SQL query, and the resulting rows
|
41
|
-
are used to build ElasticSearch documents in an intuitive way using minimal ruby
|
42
|
-
data structures. Those coming from ThinkingSphinx might find this notion -- and
|
43
|
-
syntax -- familiar.
|
44
|
-
|
45
|
-
Syphon is similar in concept to an ElasticSearch river, except it's completely
|
46
|
-
handled off the server. In fact it was originally implemented using the
|
47
|
-
[JDBC river][jdbc-river], but the current incarnation was found to be a little
|
48
|
-
lacking in functionality for a smooth integration (such as documents with
|
49
|
-
multiple nested fields, and synchronous updates for testing).
|
50
|
-
|
51
|
-
Syphon focuses on the indexing aspect only - you are free to choose another gem
|
52
|
-
for the query DSL.
|
53
|
-
|
54
|
-
[jdbc-river]: https://github.com/jprante/elasticsearch-river-jdbc
|
55
|
-
|
56
|
-
## Contributing
|
57
|
-
|
58
|
-
* [Bug reports](https://github.com/howaboutwe/syphon/issues)
|
59
|
-
* [Source](https://github.com/howaboutwe/syphon)
|
60
|
-
* Patches: Fork on Github, send pull request.
|
61
|
-
* Include tests where practical.
|
62
|
-
* Leave the version alone, or bump it in a separate commit.
|
63
|
-
|
64
|
-
## Copyright
|
65
|
-
|
66
|
-
Copyright (c) George Ogata. See LICENSE for details.
|
5
|
+
### Work in progress
|
data/lib/syphon/builder.rb
CHANGED
@@ -26,14 +26,17 @@ module Syphon
|
|
26
26
|
def add_to_document(document, row, schema = self.schema, index = 0)
|
27
27
|
schema.fields.each do |name, field|
|
28
28
|
if field.is_a?(Schema::NestedField)
|
29
|
-
|
30
|
-
index = add_to_document(
|
31
|
-
document[field.name] = combine(document[field.name], nested_doc)
|
32
|
-
index
|
29
|
+
value = {}
|
30
|
+
index = add_to_document(value, row, field.nested_schema, index)
|
33
31
|
else
|
34
|
-
|
32
|
+
value = row[index]
|
35
33
|
index += 1
|
36
34
|
end
|
35
|
+
if field.multi?
|
36
|
+
(document[field.name] ||= []) << value
|
37
|
+
else
|
38
|
+
document[field.name] = value
|
39
|
+
end
|
37
40
|
end
|
38
41
|
index
|
39
42
|
end
|
data/lib/syphon/index.rb
CHANGED
@@ -48,19 +48,28 @@ module Syphon
|
|
48
48
|
old_internal_name = internal_index_name
|
49
49
|
new_internal_name = new_internal_index_name(index_name)
|
50
50
|
|
51
|
-
|
52
|
-
|
53
|
-
body
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
51
|
+
made_it = false
|
52
|
+
begin
|
53
|
+
client.indices.create(index: new_internal_name, body: {settings: index_settings})
|
54
|
+
sources.each do |name, source|
|
55
|
+
body = source.mapping
|
56
|
+
client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
|
57
|
+
source.import(index: new_internal_name) unless options[:schema_only]
|
58
|
+
end
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
60
|
+
warmups.each { |w| w.call(new_internal_name) }
|
61
|
+
|
62
|
+
remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
|
63
|
+
add = {add: {index: new_internal_name, alias: index_name}}
|
64
|
+
client.indices.update_aliases body: {actions: [remove, add].compact}
|
65
|
+
made_it = true
|
66
|
+
ensure
|
67
|
+
if made_it
|
68
|
+
client.indices.delete(index: old_internal_name) if old_internal_name
|
69
|
+
else
|
70
|
+
client.indices.delete(index: new_internal_name) if new_internal_name
|
71
|
+
end
|
72
|
+
end
|
64
73
|
end
|
65
74
|
|
66
75
|
def destroy
|
data/lib/syphon/railtie.rb
CHANGED
@@ -33,7 +33,7 @@ module Syphon
|
|
33
33
|
config ||= {}
|
34
34
|
config.symbolize_keys!
|
35
35
|
config[:log] = normalize_log(env, root, config[:log])
|
36
|
-
config[:database] ||= dbconfig[env].dup
|
36
|
+
config[:database] ||= (dbconfig["#{env}_syphon"] || dbconfig[env]).dup
|
37
37
|
config[:index_namespace] ||= "#{app_name.underscore}_#{env}"
|
38
38
|
config[:database].try(:symbolize_keys!)
|
39
39
|
config[:elasticsearch].try(:symbolize_keys!)
|
data/lib/syphon/schema.rb
CHANGED
@@ -90,10 +90,15 @@ module Syphon
|
|
90
90
|
@type = type
|
91
91
|
@expression = expression
|
92
92
|
@properties = options.merge(type: type)
|
93
|
+
@multi = options[:multi]
|
93
94
|
end
|
94
95
|
|
95
96
|
attr_reader :schema, :name, :type, :expression, :properties
|
96
97
|
|
98
|
+
def multi?
|
99
|
+
@multi
|
100
|
+
end
|
101
|
+
|
97
102
|
def select(outer = nil)
|
98
103
|
name = outer ? "#{outer}[#{self.name}]" : self.name
|
99
104
|
"#{schema.send(:query_fragment, expression)} AS `#{name}`"
|
data/lib/syphon/version.rb
CHANGED
data/syphon.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
16
|
|
17
17
|
gem.add_dependency 'elasticsearch', '~> 0.4.0'
|
18
|
-
gem.add_dependency 'activesupport', '
|
18
|
+
gem.add_dependency 'activesupport', '< 5'
|
19
19
|
gem.add_dependency 'mysql2', '~> 0.3.12'
|
20
20
|
|
21
21
|
gem.add_development_dependency 'bundler'
|
data/test/syphon/test_builder.rb
CHANGED
@@ -30,37 +30,43 @@ describe Syphon::Builder do
|
|
30
30
|
]
|
31
31
|
end
|
32
32
|
|
33
|
-
it "
|
33
|
+
it "replaces content from subsequent rows with the same root id for singular fields" do
|
34
34
|
schema = Syphon::Schema.new do
|
35
35
|
integer :id, 0
|
36
36
|
string :name, 'x'
|
37
37
|
end
|
38
38
|
results = [[1, 'one'], [1, 'two']]
|
39
|
+
Syphon::Builder.new(results, schema).to_a.
|
40
|
+
must_equal [{id: 1, name: 'two'}]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "combines content from subsequent rows with the same root id for multi fields" do
|
44
|
+
schema = Syphon::Schema.new do
|
45
|
+
integer :id, 0
|
46
|
+
string :name, 'x', multi: true
|
47
|
+
end
|
48
|
+
results = [[1, 'one'], [1, 'two']]
|
39
49
|
Syphon::Builder.new(results, schema).to_a.
|
40
50
|
must_equal [{id: 1, name: ['one', 'two']}]
|
41
51
|
end
|
42
52
|
|
43
|
-
it "
|
53
|
+
it "replaces content from subsequent rows for singular nested fields" do
|
44
54
|
schema = Syphon::Schema.new do
|
45
55
|
integer :id, 0
|
46
56
|
nested :nested1 do
|
47
|
-
integer :
|
48
|
-
|
49
|
-
nested :nested2 do
|
50
|
-
integer :a, 'x'
|
57
|
+
integer :id, 0
|
58
|
+
integer :name, 'x'
|
51
59
|
end
|
52
60
|
end
|
53
|
-
results = [[1,
|
54
|
-
Syphon::Builder.new(results, schema).to_a.
|
55
|
-
{id: 1, nested1: {
|
56
|
-
{id: 2, nested1: {a: 20}, nested2: {a: 21}},
|
57
|
-
]
|
61
|
+
results = [[1, 2, 'a'], [1, 3, 'b']]
|
62
|
+
Syphon::Builder.new(results, schema).to_a.
|
63
|
+
must_equal [{id: 1, nested1: {id: 3, name: 'b'}}]
|
58
64
|
end
|
59
65
|
|
60
|
-
it "
|
66
|
+
it "replaces content from subsequent rows for multi nested fields" do
|
61
67
|
schema = Syphon::Schema.new do
|
62
68
|
integer :id, 0
|
63
|
-
nested :nested1 do
|
69
|
+
nested :nested1, multi: true do
|
64
70
|
integer :id, 0
|
65
71
|
integer :name, 'x'
|
66
72
|
end
|
data/test/syphon/test_index.rb
CHANGED
@@ -106,6 +106,13 @@ describe Syphon::Index do
|
|
106
106
|
hits.map { |doc| doc['_source']['login'] }.must_equal ['bob']
|
107
107
|
end
|
108
108
|
|
109
|
+
it "deletes the index if building fails" do
|
110
|
+
indices = TestIndex.client.indices.status['indices'].keys.to_set
|
111
|
+
TestIndex.source.instance_eval { def import(*); raise 'fubar'; end }
|
112
|
+
-> { TestIndex.build }.must_raise(RuntimeError)
|
113
|
+
TestIndex.client.indices.status['indices'].keys.to_set.must_equal indices
|
114
|
+
end
|
115
|
+
|
109
116
|
it "passes configured index settings" do
|
110
117
|
TestIndex.index_settings = {number_of_shards: 23}
|
111
118
|
TestIndex.build
|
data/test/syphon/test_railtie.rb
CHANGED
@@ -81,7 +81,14 @@ describe Syphon::Railtie do
|
|
81
81
|
Syphon.database_configuration.must_equal({database: 'mydb'})
|
82
82
|
end
|
83
83
|
|
84
|
-
it "defaults to the
|
84
|
+
it "defaults to a configuration for syphon in the current environment" do
|
85
|
+
write_config('test' => {})
|
86
|
+
params[:dbconfig] = {'test_syphon' => {database: 'syphondb'}, 'test' => {database: 'ardb'}}
|
87
|
+
Syphon::Railtie.set_configuration(params)
|
88
|
+
Syphon.database_configuration.must_equal({database: 'syphondb'})
|
89
|
+
end
|
90
|
+
|
91
|
+
it "defaults to the primary ActiveRecord configuration otherwise" do
|
85
92
|
write_config('test' => {})
|
86
93
|
Syphon::Railtie.set_configuration(params)
|
87
94
|
Syphon.database_configuration.must_equal({database: 'ardb'})
|
data/test/test_helper.rb
CHANGED
@@ -5,7 +5,7 @@ require 'minitest/spec'
|
|
5
5
|
require 'yaml'
|
6
6
|
require 'fileutils'
|
7
7
|
require 'temporaries'
|
8
|
-
require '
|
8
|
+
require 'byebug'
|
9
9
|
require 'looksee'
|
10
10
|
require 'rails'
|
11
11
|
|
@@ -20,21 +20,29 @@ MiniTest::Spec.class_eval do
|
|
20
20
|
def self.uses_users_table
|
21
21
|
let(:db) { Syphon.database_connection }
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
|
26
|
-
end
|
23
|
+
include Module.new {
|
24
|
+
extend MiniTest::Spec::DSL
|
27
25
|
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
before do
|
27
|
+
columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
|
28
|
+
db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
|
29
|
+
end
|
30
|
+
|
31
|
+
after do
|
32
|
+
db.query "DROP TABLE IF EXISTS users"
|
33
|
+
end
|
34
|
+
}
|
31
35
|
end
|
32
36
|
|
33
37
|
def self.uses_elasticsearch
|
34
38
|
let(:client) { Syphon.client }
|
35
39
|
|
36
|
-
|
37
|
-
|
40
|
+
include Module.new {
|
41
|
+
extend MiniTest::Spec::DSL
|
42
|
+
|
43
|
+
before { clear_indices }
|
44
|
+
after { clear_indices }
|
45
|
+
}
|
38
46
|
end
|
39
47
|
|
40
48
|
def clear_indices
|
metadata
CHANGED
@@ -1,78 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: syphon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- George Ogata
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-03-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: elasticsearch
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 0.4.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 0.4.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: activesupport
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - "<"
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version:
|
33
|
+
version: '5'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - "<"
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
40
|
+
version: '5'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: mysql2
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- - ~>
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: 0.3.12
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- - ~>
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: 0.3.12
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: bundler
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - ">="
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - ">="
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
description: Syphon data from an Arel source into ElasticSearch
|
@@ -82,7 +73,7 @@ executables: []
|
|
82
73
|
extensions: []
|
83
74
|
extra_rdoc_files: []
|
84
75
|
files:
|
85
|
-
- .gitignore
|
76
|
+
- ".gitignore"
|
86
77
|
- CHANGELOG
|
87
78
|
- Gemfile
|
88
79
|
- LICENSE
|
@@ -107,33 +98,26 @@ files:
|
|
107
98
|
- test/test_syphon.rb
|
108
99
|
homepage: https://github.com/howaboutwe/syphon
|
109
100
|
licenses: []
|
101
|
+
metadata: {}
|
110
102
|
post_install_message:
|
111
103
|
rdoc_options: []
|
112
104
|
require_paths:
|
113
105
|
- lib
|
114
106
|
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
-
none: false
|
116
107
|
requirements:
|
117
|
-
- -
|
108
|
+
- - ">="
|
118
109
|
- !ruby/object:Gem::Version
|
119
110
|
version: '0'
|
120
|
-
segments:
|
121
|
-
- 0
|
122
|
-
hash: -2157484304572429612
|
123
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
|
-
none: false
|
125
112
|
requirements:
|
126
|
-
- -
|
113
|
+
- - ">="
|
127
114
|
- !ruby/object:Gem::Version
|
128
115
|
version: '0'
|
129
|
-
segments:
|
130
|
-
- 0
|
131
|
-
hash: -2157484304572429612
|
132
116
|
requirements: []
|
133
117
|
rubyforge_project:
|
134
|
-
rubygems_version:
|
118
|
+
rubygems_version: 2.2.2
|
135
119
|
signing_key:
|
136
|
-
specification_version:
|
120
|
+
specification_version: 4
|
137
121
|
summary: Syphon data from an Arel source into ElasticSearch
|
138
122
|
test_files:
|
139
123
|
- test/config.yml.sample
|