syphon 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -2
- data/CHANGELOG +11 -0
- data/Gemfile +5 -3
- data/README.markdown +1 -62
- data/lib/syphon/builder.rb +8 -5
- data/lib/syphon/index.rb +21 -12
- data/lib/syphon/railtie.rb +1 -1
- data/lib/syphon/schema.rb +5 -0
- data/lib/syphon/version.rb +1 -1
- data/syphon.gemspec +1 -1
- data/test/syphon/test_builder.rb +19 -13
- data/test/syphon/test_index.rb +7 -0
- data/test/syphon/test_railtie.rb +8 -1
- data/test/test_helper.rb +18 -10
- metadata +18 -34
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6b4117676c9b04cf839c499c0e1923b39defb30
|
4
|
+
data.tar.gz: c95e03409f0b86a8aa6c404f8ba1583a1e033758
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7b68fea96fc36d64e29698a4dd217bbadb20d8b6369de324a4f602aee900cb76f568afb312999ef1808c863379c0e541fadfa8eaa7650ece627dc433b43281dc
|
7
|
+
data.tar.gz: 472a520467282278d21b6c8b79610cc6eb807fe5a204d4744de479b34290427de5b8c746c353fa5322bc3c3b4a41bffb50e7f5092249459a3ada0e16a735d468
|
data/.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
/.bundle
|
2
|
+
/Gemfile.lock
|
3
|
+
/test/config.yml
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.1.0 2014-03-18
|
2
|
+
|
3
|
+
* Clean up partially built index if building fails.
|
4
|
+
* Use an ENV_syphon database configuration in database.yml before falling back
|
5
|
+
to the standard one with Rails.
|
6
|
+
* Support ActiveSupport 4.
|
7
|
+
* Add :multi option for fields. This causes values to be indexed as arrays.
|
8
|
+
Arrays are no longer autodetected - fields which may be multivalued must be
|
9
|
+
declared as such, and will always be returned from ElasticSearch as an array
|
10
|
+
in ElasticSearch >= 1.0.
|
11
|
+
|
1
12
|
== 0.0.2 2013-12-12
|
2
13
|
|
3
14
|
* Add index_settings attribute to control sharding, replication, etc.
|
data/Gemfile
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
gemspec
|
3
3
|
|
4
|
-
gem 'debugger',
|
5
|
-
gem '
|
4
|
+
gem 'debugger', platform: :ruby_19
|
5
|
+
gem 'byebug', platform: [:ruby_20, :ruby_21]
|
6
|
+
gem 'minitest', '< 5'
|
7
|
+
gem 'looksee'
|
6
8
|
gem 'rails'
|
7
9
|
gem 'ritual', '~> 0.4.0'
|
8
|
-
gem 'temporaries'
|
10
|
+
gem 'temporaries'
|
data/README.markdown
CHANGED
@@ -2,65 +2,4 @@
|
|
2
2
|
|
3
3
|
Syphon data from an Arel source into ElasticSearch.
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
gem install syphon
|
8
|
-
|
9
|
-
## Usage
|
10
|
-
|
11
|
-
class UsersIndex
|
12
|
-
include Syphon::Index
|
13
|
-
|
14
|
-
define_source do
|
15
|
-
# Define some attributes. Options like "index: :not_analyzed" are passed
|
16
|
-
# to elasticsearch.
|
17
|
-
|
18
|
-
string :login, index: :not_analyzed
|
19
|
-
string :name
|
20
|
-
integer :age
|
21
|
-
geo_point :location
|
22
|
-
string :bio
|
23
|
-
nested_documents do
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
## Why?
|
30
|
-
|
31
|
-
In 2 words: fast indexing.
|
32
|
-
|
33
|
-
There is no shortage of ElasticSearch adapters for ruby. The issue is that they
|
34
|
-
typically work by defining fields as attributes (methods) on your model
|
35
|
-
instances, which means to index a lot of data, you need to roundtrip your data
|
36
|
-
through heavy ruby models, which can be quite slow, particularly if your models
|
37
|
-
are made of ActiveRecord.
|
38
|
-
|
39
|
-
Syphon makes no such assumption. You define your fields to index using SQL
|
40
|
-
expressions. These are assembled into a single SQL query, and the resulting rows
|
41
|
-
are used to build ElasticSearch documents in an intuitive way using minimal ruby
|
42
|
-
data structures. Those coming from ThinkingSphinx might find this notion -- and
|
43
|
-
syntax -- familiar.
|
44
|
-
|
45
|
-
Syphon is similar in concept to an ElasticSearch river, except it's completely
|
46
|
-
handled off the server. In fact it was originally implemented using the
|
47
|
-
[JDBC river][jdbc-river], but the current incarnation was found to be a little
|
48
|
-
lacking in functionality for a smooth integration (such as documents with
|
49
|
-
multiple nested fields, and synchronous updates for testing).
|
50
|
-
|
51
|
-
Syphon focuses on the indexing aspect only - you are free to choose another gem
|
52
|
-
for the query DSL.
|
53
|
-
|
54
|
-
[jdbc-river]: https://github.com/jprante/elasticsearch-river-jdbc
|
55
|
-
|
56
|
-
## Contributing
|
57
|
-
|
58
|
-
* [Bug reports](https://github.com/howaboutwe/syphon/issues)
|
59
|
-
* [Source](https://github.com/howaboutwe/syphon)
|
60
|
-
* Patches: Fork on Github, send pull request.
|
61
|
-
* Include tests where practical.
|
62
|
-
* Leave the version alone, or bump it in a separate commit.
|
63
|
-
|
64
|
-
## Copyright
|
65
|
-
|
66
|
-
Copyright (c) George Ogata. See LICENSE for details.
|
5
|
+
### Work in progress
|
data/lib/syphon/builder.rb
CHANGED
@@ -26,14 +26,17 @@ module Syphon
|
|
26
26
|
def add_to_document(document, row, schema = self.schema, index = 0)
|
27
27
|
schema.fields.each do |name, field|
|
28
28
|
if field.is_a?(Schema::NestedField)
|
29
|
-
|
30
|
-
index = add_to_document(
|
31
|
-
document[field.name] = combine(document[field.name], nested_doc)
|
32
|
-
index
|
29
|
+
value = {}
|
30
|
+
index = add_to_document(value, row, field.nested_schema, index)
|
33
31
|
else
|
34
|
-
|
32
|
+
value = row[index]
|
35
33
|
index += 1
|
36
34
|
end
|
35
|
+
if field.multi?
|
36
|
+
(document[field.name] ||= []) << value
|
37
|
+
else
|
38
|
+
document[field.name] = value
|
39
|
+
end
|
37
40
|
end
|
38
41
|
index
|
39
42
|
end
|
data/lib/syphon/index.rb
CHANGED
@@ -48,19 +48,28 @@ module Syphon
|
|
48
48
|
old_internal_name = internal_index_name
|
49
49
|
new_internal_name = new_internal_index_name(index_name)
|
50
50
|
|
51
|
-
|
52
|
-
|
53
|
-
body
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
51
|
+
made_it = false
|
52
|
+
begin
|
53
|
+
client.indices.create(index: new_internal_name, body: {settings: index_settings})
|
54
|
+
sources.each do |name, source|
|
55
|
+
body = source.mapping
|
56
|
+
client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
|
57
|
+
source.import(index: new_internal_name) unless options[:schema_only]
|
58
|
+
end
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
60
|
+
warmups.each { |w| w.call(new_internal_name) }
|
61
|
+
|
62
|
+
remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
|
63
|
+
add = {add: {index: new_internal_name, alias: index_name}}
|
64
|
+
client.indices.update_aliases body: {actions: [remove, add].compact}
|
65
|
+
made_it = true
|
66
|
+
ensure
|
67
|
+
if made_it
|
68
|
+
client.indices.delete(index: old_internal_name) if old_internal_name
|
69
|
+
else
|
70
|
+
client.indices.delete(index: new_internal_name) if new_internal_name
|
71
|
+
end
|
72
|
+
end
|
64
73
|
end
|
65
74
|
|
66
75
|
def destroy
|
data/lib/syphon/railtie.rb
CHANGED
@@ -33,7 +33,7 @@ module Syphon
|
|
33
33
|
config ||= {}
|
34
34
|
config.symbolize_keys!
|
35
35
|
config[:log] = normalize_log(env, root, config[:log])
|
36
|
-
config[:database] ||= dbconfig[env].dup
|
36
|
+
config[:database] ||= (dbconfig["#{env}_syphon"] || dbconfig[env]).dup
|
37
37
|
config[:index_namespace] ||= "#{app_name.underscore}_#{env}"
|
38
38
|
config[:database].try(:symbolize_keys!)
|
39
39
|
config[:elasticsearch].try(:symbolize_keys!)
|
data/lib/syphon/schema.rb
CHANGED
@@ -90,10 +90,15 @@ module Syphon
|
|
90
90
|
@type = type
|
91
91
|
@expression = expression
|
92
92
|
@properties = options.merge(type: type)
|
93
|
+
@multi = options[:multi]
|
93
94
|
end
|
94
95
|
|
95
96
|
attr_reader :schema, :name, :type, :expression, :properties
|
96
97
|
|
98
|
+
def multi?
|
99
|
+
@multi
|
100
|
+
end
|
101
|
+
|
97
102
|
def select(outer = nil)
|
98
103
|
name = outer ? "#{outer}[#{self.name}]" : self.name
|
99
104
|
"#{schema.send(:query_fragment, expression)} AS `#{name}`"
|
data/lib/syphon/version.rb
CHANGED
data/syphon.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
16
|
|
17
17
|
gem.add_dependency 'elasticsearch', '~> 0.4.0'
|
18
|
-
gem.add_dependency 'activesupport', '
|
18
|
+
gem.add_dependency 'activesupport', '< 5'
|
19
19
|
gem.add_dependency 'mysql2', '~> 0.3.12'
|
20
20
|
|
21
21
|
gem.add_development_dependency 'bundler'
|
data/test/syphon/test_builder.rb
CHANGED
@@ -30,37 +30,43 @@ describe Syphon::Builder do
|
|
30
30
|
]
|
31
31
|
end
|
32
32
|
|
33
|
-
it "
|
33
|
+
it "replaces content from subsequent rows with the same root id for singular fields" do
|
34
34
|
schema = Syphon::Schema.new do
|
35
35
|
integer :id, 0
|
36
36
|
string :name, 'x'
|
37
37
|
end
|
38
38
|
results = [[1, 'one'], [1, 'two']]
|
39
|
+
Syphon::Builder.new(results, schema).to_a.
|
40
|
+
must_equal [{id: 1, name: 'two'}]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "combines content from subsequent rows with the same root id for multi fields" do
|
44
|
+
schema = Syphon::Schema.new do
|
45
|
+
integer :id, 0
|
46
|
+
string :name, 'x', multi: true
|
47
|
+
end
|
48
|
+
results = [[1, 'one'], [1, 'two']]
|
39
49
|
Syphon::Builder.new(results, schema).to_a.
|
40
50
|
must_equal [{id: 1, name: ['one', 'two']}]
|
41
51
|
end
|
42
52
|
|
43
|
-
it "
|
53
|
+
it "replaces content from subsequent rows for singular nested fields" do
|
44
54
|
schema = Syphon::Schema.new do
|
45
55
|
integer :id, 0
|
46
56
|
nested :nested1 do
|
47
|
-
integer :
|
48
|
-
|
49
|
-
nested :nested2 do
|
50
|
-
integer :a, 'x'
|
57
|
+
integer :id, 0
|
58
|
+
integer :name, 'x'
|
51
59
|
end
|
52
60
|
end
|
53
|
-
results = [[1,
|
54
|
-
Syphon::Builder.new(results, schema).to_a.
|
55
|
-
{id: 1, nested1: {
|
56
|
-
{id: 2, nested1: {a: 20}, nested2: {a: 21}},
|
57
|
-
]
|
61
|
+
results = [[1, 2, 'a'], [1, 3, 'b']]
|
62
|
+
Syphon::Builder.new(results, schema).to_a.
|
63
|
+
must_equal [{id: 1, nested1: {id: 3, name: 'b'}}]
|
58
64
|
end
|
59
65
|
|
60
|
-
it "
|
66
|
+
it "replaces content from subsequent rows for multi nested fields" do
|
61
67
|
schema = Syphon::Schema.new do
|
62
68
|
integer :id, 0
|
63
|
-
nested :nested1 do
|
69
|
+
nested :nested1, multi: true do
|
64
70
|
integer :id, 0
|
65
71
|
integer :name, 'x'
|
66
72
|
end
|
data/test/syphon/test_index.rb
CHANGED
@@ -106,6 +106,13 @@ describe Syphon::Index do
|
|
106
106
|
hits.map { |doc| doc['_source']['login'] }.must_equal ['bob']
|
107
107
|
end
|
108
108
|
|
109
|
+
it "deletes the index if building fails" do
|
110
|
+
indices = TestIndex.client.indices.status['indices'].keys.to_set
|
111
|
+
TestIndex.source.instance_eval { def import(*); raise 'fubar'; end }
|
112
|
+
-> { TestIndex.build }.must_raise(RuntimeError)
|
113
|
+
TestIndex.client.indices.status['indices'].keys.to_set.must_equal indices
|
114
|
+
end
|
115
|
+
|
109
116
|
it "passes configured index settings" do
|
110
117
|
TestIndex.index_settings = {number_of_shards: 23}
|
111
118
|
TestIndex.build
|
data/test/syphon/test_railtie.rb
CHANGED
@@ -81,7 +81,14 @@ describe Syphon::Railtie do
|
|
81
81
|
Syphon.database_configuration.must_equal({database: 'mydb'})
|
82
82
|
end
|
83
83
|
|
84
|
-
it "defaults to the
|
84
|
+
it "defaults to a configuration for syphon in the current environment" do
|
85
|
+
write_config('test' => {})
|
86
|
+
params[:dbconfig] = {'test_syphon' => {database: 'syphondb'}, 'test' => {database: 'ardb'}}
|
87
|
+
Syphon::Railtie.set_configuration(params)
|
88
|
+
Syphon.database_configuration.must_equal({database: 'syphondb'})
|
89
|
+
end
|
90
|
+
|
91
|
+
it "defaults to the primary ActiveRecord configuration otherwise" do
|
85
92
|
write_config('test' => {})
|
86
93
|
Syphon::Railtie.set_configuration(params)
|
87
94
|
Syphon.database_configuration.must_equal({database: 'ardb'})
|
data/test/test_helper.rb
CHANGED
@@ -5,7 +5,7 @@ require 'minitest/spec'
|
|
5
5
|
require 'yaml'
|
6
6
|
require 'fileutils'
|
7
7
|
require 'temporaries'
|
8
|
-
require '
|
8
|
+
require 'byebug'
|
9
9
|
require 'looksee'
|
10
10
|
require 'rails'
|
11
11
|
|
@@ -20,21 +20,29 @@ MiniTest::Spec.class_eval do
|
|
20
20
|
def self.uses_users_table
|
21
21
|
let(:db) { Syphon.database_connection }
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
|
26
|
-
end
|
23
|
+
include Module.new {
|
24
|
+
extend MiniTest::Spec::DSL
|
27
25
|
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
before do
|
27
|
+
columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
|
28
|
+
db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
|
29
|
+
end
|
30
|
+
|
31
|
+
after do
|
32
|
+
db.query "DROP TABLE IF EXISTS users"
|
33
|
+
end
|
34
|
+
}
|
31
35
|
end
|
32
36
|
|
33
37
|
def self.uses_elasticsearch
|
34
38
|
let(:client) { Syphon.client }
|
35
39
|
|
36
|
-
|
37
|
-
|
40
|
+
include Module.new {
|
41
|
+
extend MiniTest::Spec::DSL
|
42
|
+
|
43
|
+
before { clear_indices }
|
44
|
+
after { clear_indices }
|
45
|
+
}
|
38
46
|
end
|
39
47
|
|
40
48
|
def clear_indices
|
metadata
CHANGED
@@ -1,78 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: syphon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- George Ogata
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-03-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: elasticsearch
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 0.4.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 0.4.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: activesupport
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - "<"
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version:
|
33
|
+
version: '5'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - "<"
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
40
|
+
version: '5'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: mysql2
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- - ~>
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: 0.3.12
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- - ~>
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: 0.3.12
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: bundler
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - ">="
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - ">="
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
description: Syphon data from an Arel source into ElasticSearch
|
@@ -82,7 +73,7 @@ executables: []
|
|
82
73
|
extensions: []
|
83
74
|
extra_rdoc_files: []
|
84
75
|
files:
|
85
|
-
- .gitignore
|
76
|
+
- ".gitignore"
|
86
77
|
- CHANGELOG
|
87
78
|
- Gemfile
|
88
79
|
- LICENSE
|
@@ -107,33 +98,26 @@ files:
|
|
107
98
|
- test/test_syphon.rb
|
108
99
|
homepage: https://github.com/howaboutwe/syphon
|
109
100
|
licenses: []
|
101
|
+
metadata: {}
|
110
102
|
post_install_message:
|
111
103
|
rdoc_options: []
|
112
104
|
require_paths:
|
113
105
|
- lib
|
114
106
|
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
-
none: false
|
116
107
|
requirements:
|
117
|
-
- -
|
108
|
+
- - ">="
|
118
109
|
- !ruby/object:Gem::Version
|
119
110
|
version: '0'
|
120
|
-
segments:
|
121
|
-
- 0
|
122
|
-
hash: -2157484304572429612
|
123
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
|
-
none: false
|
125
112
|
requirements:
|
126
|
-
- -
|
113
|
+
- - ">="
|
127
114
|
- !ruby/object:Gem::Version
|
128
115
|
version: '0'
|
129
|
-
segments:
|
130
|
-
- 0
|
131
|
-
hash: -2157484304572429612
|
132
116
|
requirements: []
|
133
117
|
rubyforge_project:
|
134
|
-
rubygems_version:
|
118
|
+
rubygems_version: 2.2.2
|
135
119
|
signing_key:
|
136
|
-
specification_version:
|
120
|
+
specification_version: 4
|
137
121
|
summary: Syphon data from an Arel source into ElasticSearch
|
138
122
|
test_files:
|
139
123
|
- test/config.yml.sample
|