syphon 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6b4117676c9b04cf839c499c0e1923b39defb30
4
+ data.tar.gz: c95e03409f0b86a8aa6c404f8ba1583a1e033758
5
+ SHA512:
6
+ metadata.gz: 7b68fea96fc36d64e29698a4dd217bbadb20d8b6369de324a4f602aee900cb76f568afb312999ef1808c863379c0e541fadfa8eaa7650ece627dc433b43281dc
7
+ data.tar.gz: 472a520467282278d21b6c8b79610cc6eb807fe5a204d4744de479b34290427de5b8c746c353fa5322bc3c3b4a41bffb50e7f5092249459a3ada0e16a735d468
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
- Gemfile.lock
2
- test/config.yml
1
+ /.bundle
2
+ /Gemfile.lock
3
+ /test/config.yml
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ == 0.1.0 2014-03-18
2
+
3
+ * Clean up partially built index if building fails.
4
+ * Use an ENV_syphon database configuration in database.yml before falling back
5
+ to the standard one with Rails.
6
+ * Support ActiveSupport 4.
7
+ * Add :multi option for fields. This causes values to be indexed as arrays.
8
+ Arrays are no longer autodetected - fields which may be multivalued must be
9
+ declared as such, and will always be returned from ElasticSearch as an array
10
+ in ElasticSearch >= 1.0.
11
+
1
12
  == 0.0.2 2013-12-12
2
13
 
3
14
  * Add index_settings attribute to control sharding, replication, etc.
data/Gemfile CHANGED
@@ -1,8 +1,10 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
3
 
4
- gem 'debugger', '~> 1.6.0', platform: :ruby_19
5
- gem 'looksee', '~> 1.1.0'
4
+ gem 'debugger', platform: :ruby_19
5
+ gem 'byebug', platform: [:ruby_20, :ruby_21]
6
+ gem 'minitest', '< 5'
7
+ gem 'looksee'
6
8
  gem 'rails'
7
9
  gem 'ritual', '~> 0.4.0'
8
- gem 'temporaries', '~> 0.3.0'
10
+ gem 'temporaries'
data/README.markdown CHANGED
@@ -2,65 +2,4 @@
2
2
 
3
3
  Syphon data from an Arel source into ElasticSearch.
4
4
 
5
- ## Installation
6
-
7
- gem install syphon
8
-
9
- ## Usage
10
-
11
- class UsersIndex
12
- include Syphon::Index
13
-
14
- define_source do
15
- # Define some attributes. Options like "index: :not_analyzed" are passed
16
- # to elasticsearch.
17
-
18
- string :login, index: :not_analyzed
19
- string :name
20
- integer :age
21
- geo_point :location
22
- string :bio
23
- nested_documents do
24
-
25
- end
26
- end
27
- end
28
-
29
- ## Why?
30
-
31
- In 2 words: fast indexing.
32
-
33
- There is no shortage of ElasticSearch adapters for ruby. The issue is that they
34
- typically work by defining fields as attributes (methods) on your model
35
- instances, which means to index a lot of data, you need to roundtrip your data
36
- through heavy ruby models, which can be quite slow, particularly if your models
37
- are made of ActiveRecord.
38
-
39
- Syphon makes no such assumption. You define your fields to index using SQL
40
- expressions. These are assembled into a single SQL query, and the resulting rows
41
- are used to build ElasticSearch documents in an intuitive way using minimal ruby
42
- data structures. Those coming from ThinkingSphinx might find this notion -- and
43
- syntax -- familiar.
44
-
45
- Syphon is similar in concept to an ElasticSearch river, except it's completely
46
- handled off the server. In fact it was originally implemented using the
47
- [JDBC river][jdbc-river], but the current incarnation was found to be a little
48
- lacking in functionality for a smooth integration (such as documents with
49
- multiple nested fields, and synchronous updates for testing).
50
-
51
- Syphon focuses on the indexing aspect only - you are free to choose another gem
52
- for the query DSL.
53
-
54
- [jdbc-river]: https://github.com/jprante/elasticsearch-river-jdbc
55
-
56
- ## Contributing
57
-
58
- * [Bug reports](https://github.com/howaboutwe/syphon/issues)
59
- * [Source](https://github.com/howaboutwe/syphon)
60
- * Patches: Fork on Github, send pull request.
61
- * Include tests where practical.
62
- * Leave the version alone, or bump it in a separate commit.
63
-
64
- ## Copyright
65
-
66
- Copyright (c) George Ogata. See LICENSE for details.
5
+ ### Work in progress
@@ -26,14 +26,17 @@ module Syphon
26
26
  def add_to_document(document, row, schema = self.schema, index = 0)
27
27
  schema.fields.each do |name, field|
28
28
  if field.is_a?(Schema::NestedField)
29
- nested_doc = {}
30
- index = add_to_document(nested_doc, row, field.nested_schema, index)
31
- document[field.name] = combine(document[field.name], nested_doc)
32
- index
29
+ value = {}
30
+ index = add_to_document(value, row, field.nested_schema, index)
33
31
  else
34
- document[field.name] = combine(document[field.name], row[index])
32
+ value = row[index]
35
33
  index += 1
36
34
  end
35
+ if field.multi?
36
+ (document[field.name] ||= []) << value
37
+ else
38
+ document[field.name] = value
39
+ end
37
40
  end
38
41
  index
39
42
  end
data/lib/syphon/index.rb CHANGED
@@ -48,19 +48,28 @@ module Syphon
48
48
  old_internal_name = internal_index_name
49
49
  new_internal_name = new_internal_index_name(index_name)
50
50
 
51
- client.indices.create(index: new_internal_name, body: {settings: index_settings})
52
- sources.each do |name, source|
53
- body = source.mapping
54
- client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
55
- source.import(index: new_internal_name) unless options[:schema_only]
56
- end
57
-
58
- warmups.each { |w| w.call(new_internal_name) }
51
+ made_it = false
52
+ begin
53
+ client.indices.create(index: new_internal_name, body: {settings: index_settings})
54
+ sources.each do |name, source|
55
+ body = source.mapping
56
+ client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
57
+ source.import(index: new_internal_name) unless options[:schema_only]
58
+ end
59
59
 
60
- remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
61
- add = {add: {index: new_internal_name, alias: index_name}}
62
- client.indices.update_aliases body: {actions: [remove, add].compact}
63
- client.indices.delete(index: old_internal_name) if old_internal_name
60
+ warmups.each { |w| w.call(new_internal_name) }
61
+
62
+ remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
63
+ add = {add: {index: new_internal_name, alias: index_name}}
64
+ client.indices.update_aliases body: {actions: [remove, add].compact}
65
+ made_it = true
66
+ ensure
67
+ if made_it
68
+ client.indices.delete(index: old_internal_name) if old_internal_name
69
+ else
70
+ client.indices.delete(index: new_internal_name) if new_internal_name
71
+ end
72
+ end
64
73
  end
65
74
 
66
75
  def destroy
@@ -33,7 +33,7 @@ module Syphon
33
33
  config ||= {}
34
34
  config.symbolize_keys!
35
35
  config[:log] = normalize_log(env, root, config[:log])
36
- config[:database] ||= dbconfig[env].dup
36
+ config[:database] ||= (dbconfig["#{env}_syphon"] || dbconfig[env]).dup
37
37
  config[:index_namespace] ||= "#{app_name.underscore}_#{env}"
38
38
  config[:database].try(:symbolize_keys!)
39
39
  config[:elasticsearch].try(:symbolize_keys!)
data/lib/syphon/schema.rb CHANGED
@@ -90,10 +90,15 @@ module Syphon
90
90
  @type = type
91
91
  @expression = expression
92
92
  @properties = options.merge(type: type)
93
+ @multi = options[:multi]
93
94
  end
94
95
 
95
96
  attr_reader :schema, :name, :type, :expression, :properties
96
97
 
98
+ def multi?
99
+ @multi
100
+ end
101
+
97
102
  def select(outer = nil)
98
103
  name = outer ? "#{outer}[#{self.name}]" : self.name
99
104
  "#{schema.send(:query_fragment, expression)} AS `#{name}`"
@@ -1,5 +1,5 @@
1
1
  module Syphon
2
- VERSION = [0, 0, 2]
2
+ VERSION = [0, 1, 0]
3
3
 
4
4
  class << VERSION
5
5
  include Comparable
data/syphon.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
16
 
17
17
  gem.add_dependency 'elasticsearch', '~> 0.4.0'
18
- gem.add_dependency 'activesupport', '~> 3.2.0'
18
+ gem.add_dependency 'activesupport', '< 5'
19
19
  gem.add_dependency 'mysql2', '~> 0.3.12'
20
20
 
21
21
  gem.add_development_dependency 'bundler'
@@ -30,37 +30,43 @@ describe Syphon::Builder do
30
30
  ]
31
31
  end
32
32
 
33
- it "merges content from rows with the same root id" do
33
+ it "replaces content from subsequent rows with the same root id for singular fields" do
34
34
  schema = Syphon::Schema.new do
35
35
  integer :id, 0
36
36
  string :name, 'x'
37
37
  end
38
38
  results = [[1, 'one'], [1, 'two']]
39
+ Syphon::Builder.new(results, schema).to_a.
40
+ must_equal [{id: 1, name: 'two'}]
41
+ end
42
+
43
+ it "combines content from subsequent rows with the same root id for multi fields" do
44
+ schema = Syphon::Schema.new do
45
+ integer :id, 0
46
+ string :name, 'x', multi: true
47
+ end
48
+ results = [[1, 'one'], [1, 'two']]
39
49
  Syphon::Builder.new(results, schema).to_a.
40
50
  must_equal [{id: 1, name: ['one', 'two']}]
41
51
  end
42
52
 
43
- it "merges content with the same root id correctly when there are nested fields" do
53
+ it "replaces content from subsequent rows for singular nested fields" do
44
54
  schema = Syphon::Schema.new do
45
55
  integer :id, 0
46
56
  nested :nested1 do
47
- integer :a, 'x'
48
- end
49
- nested :nested2 do
50
- integer :a, 'x'
57
+ integer :id, 0
58
+ integer :name, 'x'
51
59
  end
52
60
  end
53
- results = [[1, 10, 11], [2, 20, 21]]
54
- Syphon::Builder.new(results, schema).to_a.must_equal [
55
- {id: 1, nested1: {a: 10}, nested2: {a: 11}},
56
- {id: 2, nested1: {a: 20}, nested2: {a: 21}},
57
- ]
61
+ results = [[1, 2, 'a'], [1, 3, 'b']]
62
+ Syphon::Builder.new(results, schema).to_a.
63
+ must_equal [{id: 1, nested1: {id: 3, name: 'b'}}]
58
64
  end
59
65
 
60
- it "supports arrays as nested fields" do
66
+ it "replaces content from subsequent rows for multi nested fields" do
61
67
  schema = Syphon::Schema.new do
62
68
  integer :id, 0
63
- nested :nested1 do
69
+ nested :nested1, multi: true do
64
70
  integer :id, 0
65
71
  integer :name, 'x'
66
72
  end
@@ -106,6 +106,13 @@ describe Syphon::Index do
106
106
  hits.map { |doc| doc['_source']['login'] }.must_equal ['bob']
107
107
  end
108
108
 
109
+ it "deletes the index if building fails" do
110
+ indices = TestIndex.client.indices.status['indices'].keys.to_set
111
+ TestIndex.source.instance_eval { def import(*); raise 'fubar'; end }
112
+ -> { TestIndex.build }.must_raise(RuntimeError)
113
+ TestIndex.client.indices.status['indices'].keys.to_set.must_equal indices
114
+ end
115
+
109
116
  it "passes configured index settings" do
110
117
  TestIndex.index_settings = {number_of_shards: 23}
111
118
  TestIndex.build
@@ -81,7 +81,14 @@ describe Syphon::Railtie do
81
81
  Syphon.database_configuration.must_equal({database: 'mydb'})
82
82
  end
83
83
 
84
- it "defaults to the primary ActiveRecord configuration" do
84
+ it "defaults to a configuration for syphon in the current environment" do
85
+ write_config('test' => {})
86
+ params[:dbconfig] = {'test_syphon' => {database: 'syphondb'}, 'test' => {database: 'ardb'}}
87
+ Syphon::Railtie.set_configuration(params)
88
+ Syphon.database_configuration.must_equal({database: 'syphondb'})
89
+ end
90
+
91
+ it "defaults to the primary ActiveRecord configuration otherwise" do
85
92
  write_config('test' => {})
86
93
  Syphon::Railtie.set_configuration(params)
87
94
  Syphon.database_configuration.must_equal({database: 'ardb'})
data/test/test_helper.rb CHANGED
@@ -5,7 +5,7 @@ require 'minitest/spec'
5
5
  require 'yaml'
6
6
  require 'fileutils'
7
7
  require 'temporaries'
8
- require 'debugger' if RUBY_VERSION < '2.0'
8
+ require 'byebug'
9
9
  require 'looksee'
10
10
  require 'rails'
11
11
 
@@ -20,21 +20,29 @@ MiniTest::Spec.class_eval do
20
20
  def self.uses_users_table
21
21
  let(:db) { Syphon.database_connection }
22
22
 
23
- before do
24
- columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
25
- db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
26
- end
23
+ include Module.new {
24
+ extend MiniTest::Spec::DSL
27
25
 
28
- after do
29
- db.query "DROP TABLE IF EXISTS users"
30
- end
26
+ before do
27
+ columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
28
+ db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
29
+ end
30
+
31
+ after do
32
+ db.query "DROP TABLE IF EXISTS users"
33
+ end
34
+ }
31
35
  end
32
36
 
33
37
  def self.uses_elasticsearch
34
38
  let(:client) { Syphon.client }
35
39
 
36
- before { clear_indices }
37
- after { clear_indices }
40
+ include Module.new {
41
+ extend MiniTest::Spec::DSL
42
+
43
+ before { clear_indices }
44
+ after { clear_indices }
45
+ }
38
46
  end
39
47
 
40
48
  def clear_indices
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: syphon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - George Ogata
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-12-12 00:00:00.000000000 Z
11
+ date: 2014-03-18 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: elasticsearch
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: 0.4.0
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: 0.4.0
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: activesupport
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "<"
36
32
  - !ruby/object:Gem::Version
37
- version: 3.2.0
33
+ version: '5'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "<"
44
39
  - !ruby/object:Gem::Version
45
- version: 3.2.0
40
+ version: '5'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: mysql2
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: 0.3.12
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: 0.3.12
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: bundler
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - ">="
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - ">="
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0'
78
69
  description: Syphon data from an Arel source into ElasticSearch
@@ -82,7 +73,7 @@ executables: []
82
73
  extensions: []
83
74
  extra_rdoc_files: []
84
75
  files:
85
- - .gitignore
76
+ - ".gitignore"
86
77
  - CHANGELOG
87
78
  - Gemfile
88
79
  - LICENSE
@@ -107,33 +98,26 @@ files:
107
98
  - test/test_syphon.rb
108
99
  homepage: https://github.com/howaboutwe/syphon
109
100
  licenses: []
101
+ metadata: {}
110
102
  post_install_message:
111
103
  rdoc_options: []
112
104
  require_paths:
113
105
  - lib
114
106
  required_ruby_version: !ruby/object:Gem::Requirement
115
- none: false
116
107
  requirements:
117
- - - ! '>='
108
+ - - ">="
118
109
  - !ruby/object:Gem::Version
119
110
  version: '0'
120
- segments:
121
- - 0
122
- hash: -2157484304572429612
123
111
  required_rubygems_version: !ruby/object:Gem::Requirement
124
- none: false
125
112
  requirements:
126
- - - ! '>='
113
+ - - ">="
127
114
  - !ruby/object:Gem::Version
128
115
  version: '0'
129
- segments:
130
- - 0
131
- hash: -2157484304572429612
132
116
  requirements: []
133
117
  rubyforge_project:
134
- rubygems_version: 1.8.25
118
+ rubygems_version: 2.2.2
135
119
  signing_key:
136
- specification_version: 3
120
+ specification_version: 4
137
121
  summary: Syphon data from an Arel source into ElasticSearch
138
122
  test_files:
139
123
  - test/config.yml.sample