syphon 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6b4117676c9b04cf839c499c0e1923b39defb30
4
+ data.tar.gz: c95e03409f0b86a8aa6c404f8ba1583a1e033758
5
+ SHA512:
6
+ metadata.gz: 7b68fea96fc36d64e29698a4dd217bbadb20d8b6369de324a4f602aee900cb76f568afb312999ef1808c863379c0e541fadfa8eaa7650ece627dc433b43281dc
7
+ data.tar.gz: 472a520467282278d21b6c8b79610cc6eb807fe5a204d4744de479b34290427de5b8c746c353fa5322bc3c3b4a41bffb50e7f5092249459a3ada0e16a735d468
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
- Gemfile.lock
2
- test/config.yml
1
+ /.bundle
2
+ /Gemfile.lock
3
+ /test/config.yml
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ == 0.1.0 2014-03-18
2
+
3
+ * Clean up partially built index if building fails.
4
+ * Use an ENV_syphon database configuration in database.yml before falling back
5
+ to the standard one with Rails.
6
+ * Support ActiveSupport 4.
7
+ * Add :multi option for fields. This causes values to be indexed as arrays.
8
+ Arrays are no longer autodetected - fields which may be multivalued must be
9
+ declared as such, and will always be returned from ElasticSearch as an array
10
+ in ElasticSearch >= 1.0.
11
+
1
12
  == 0.0.2 2013-12-12
2
13
 
3
14
  * Add index_settings attribute to control sharding, replication, etc.
data/Gemfile CHANGED
@@ -1,8 +1,10 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
3
 
4
- gem 'debugger', '~> 1.6.0', platform: :ruby_19
5
- gem 'looksee', '~> 1.1.0'
4
+ gem 'debugger', platform: :ruby_19
5
+ gem 'byebug', platform: [:ruby_20, :ruby_21]
6
+ gem 'minitest', '< 5'
7
+ gem 'looksee'
6
8
  gem 'rails'
7
9
  gem 'ritual', '~> 0.4.0'
8
- gem 'temporaries', '~> 0.3.0'
10
+ gem 'temporaries'
data/README.markdown CHANGED
@@ -2,65 +2,4 @@
2
2
 
3
3
  Syphon data from an Arel source into ElasticSearch.
4
4
 
5
- ## Installation
6
-
7
- gem install syphon
8
-
9
- ## Usage
10
-
11
- class UsersIndex
12
- include Syphon::Index
13
-
14
- define_source do
15
- # Define some attributes. Options like "index: :not_analyzed" are passed
16
- # to elasticsearch.
17
-
18
- string :login, index: :not_analyzed
19
- string :name
20
- integer :age
21
- geo_point :location
22
- string :bio
23
- nested_documents do
24
-
25
- end
26
- end
27
- end
28
-
29
- ## Why?
30
-
31
- In 2 words: fast indexing.
32
-
33
- There is no shortage of ElasticSearch adapters for ruby. The issue is that they
34
- typically work by defining fields as attributes (methods) on your model
35
- instances, which means to index a lot of data, you need to roundtrip your data
36
- through heavy ruby models, which can be quite slow, particularly if your models
37
- are made of ActiveRecord.
38
-
39
- Syphon makes no such assumption. You define your fields to index using SQL
40
- expressions. These are assembled into a single SQL query, and the resulting rows
41
- are used to build ElasticSearch documents in an intuitive way using minimal ruby
42
- data structures. Those coming from ThinkingSphinx might find this notion -- and
43
- syntax -- familiar.
44
-
45
- Syphon is similar in concept to an ElasticSearch river, except it's completely
46
- handled off the server. In fact it was originally implemented using the
47
- [JDBC river][jdbc-river], but the current incarnation was found to be a little
48
- lacking in functionality for a smooth integration (such as documents with
49
- multiple nested fields, and synchronous updates for testing).
50
-
51
- Syphon focuses on the indexing aspect only - you are free to choose another gem
52
- for the query DSL.
53
-
54
- [jdbc-river]: https://github.com/jprante/elasticsearch-river-jdbc
55
-
56
- ## Contributing
57
-
58
- * [Bug reports](https://github.com/howaboutwe/syphon/issues)
59
- * [Source](https://github.com/howaboutwe/syphon)
60
- * Patches: Fork on Github, send pull request.
61
- * Include tests where practical.
62
- * Leave the version alone, or bump it in a separate commit.
63
-
64
- ## Copyright
65
-
66
- Copyright (c) George Ogata. See LICENSE for details.
5
+ ### Work in progress
@@ -26,14 +26,17 @@ module Syphon
26
26
  def add_to_document(document, row, schema = self.schema, index = 0)
27
27
  schema.fields.each do |name, field|
28
28
  if field.is_a?(Schema::NestedField)
29
- nested_doc = {}
30
- index = add_to_document(nested_doc, row, field.nested_schema, index)
31
- document[field.name] = combine(document[field.name], nested_doc)
32
- index
29
+ value = {}
30
+ index = add_to_document(value, row, field.nested_schema, index)
33
31
  else
34
- document[field.name] = combine(document[field.name], row[index])
32
+ value = row[index]
35
33
  index += 1
36
34
  end
35
+ if field.multi?
36
+ (document[field.name] ||= []) << value
37
+ else
38
+ document[field.name] = value
39
+ end
37
40
  end
38
41
  index
39
42
  end
data/lib/syphon/index.rb CHANGED
@@ -48,19 +48,28 @@ module Syphon
48
48
  old_internal_name = internal_index_name
49
49
  new_internal_name = new_internal_index_name(index_name)
50
50
 
51
- client.indices.create(index: new_internal_name, body: {settings: index_settings})
52
- sources.each do |name, source|
53
- body = source.mapping
54
- client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
55
- source.import(index: new_internal_name) unless options[:schema_only]
56
- end
57
-
58
- warmups.each { |w| w.call(new_internal_name) }
51
+ made_it = false
52
+ begin
53
+ client.indices.create(index: new_internal_name, body: {settings: index_settings})
54
+ sources.each do |name, source|
55
+ body = source.mapping
56
+ client.indices.put_mapping(index: new_internal_name, type: source.type, body: body)
57
+ source.import(index: new_internal_name) unless options[:schema_only]
58
+ end
59
59
 
60
- remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
61
- add = {add: {index: new_internal_name, alias: index_name}}
62
- client.indices.update_aliases body: {actions: [remove, add].compact}
63
- client.indices.delete(index: old_internal_name) if old_internal_name
60
+ warmups.each { |w| w.call(new_internal_name) }
61
+
62
+ remove = {remove: {index: old_internal_name, alias: index_name}} if old_internal_name
63
+ add = {add: {index: new_internal_name, alias: index_name}}
64
+ client.indices.update_aliases body: {actions: [remove, add].compact}
65
+ made_it = true
66
+ ensure
67
+ if made_it
68
+ client.indices.delete(index: old_internal_name) if old_internal_name
69
+ else
70
+ client.indices.delete(index: new_internal_name) if new_internal_name
71
+ end
72
+ end
64
73
  end
65
74
 
66
75
  def destroy
@@ -33,7 +33,7 @@ module Syphon
33
33
  config ||= {}
34
34
  config.symbolize_keys!
35
35
  config[:log] = normalize_log(env, root, config[:log])
36
- config[:database] ||= dbconfig[env].dup
36
+ config[:database] ||= (dbconfig["#{env}_syphon"] || dbconfig[env]).dup
37
37
  config[:index_namespace] ||= "#{app_name.underscore}_#{env}"
38
38
  config[:database].try(:symbolize_keys!)
39
39
  config[:elasticsearch].try(:symbolize_keys!)
data/lib/syphon/schema.rb CHANGED
@@ -90,10 +90,15 @@ module Syphon
90
90
  @type = type
91
91
  @expression = expression
92
92
  @properties = options.merge(type: type)
93
+ @multi = options[:multi]
93
94
  end
94
95
 
95
96
  attr_reader :schema, :name, :type, :expression, :properties
96
97
 
98
+ def multi?
99
+ @multi
100
+ end
101
+
97
102
  def select(outer = nil)
98
103
  name = outer ? "#{outer}[#{self.name}]" : self.name
99
104
  "#{schema.send(:query_fragment, expression)} AS `#{name}`"
@@ -1,5 +1,5 @@
1
1
  module Syphon
2
- VERSION = [0, 0, 2]
2
+ VERSION = [0, 1, 0]
3
3
 
4
4
  class << VERSION
5
5
  include Comparable
data/syphon.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
16
 
17
17
  gem.add_dependency 'elasticsearch', '~> 0.4.0'
18
- gem.add_dependency 'activesupport', '~> 3.2.0'
18
+ gem.add_dependency 'activesupport', '< 5'
19
19
  gem.add_dependency 'mysql2', '~> 0.3.12'
20
20
 
21
21
  gem.add_development_dependency 'bundler'
@@ -30,37 +30,43 @@ describe Syphon::Builder do
30
30
  ]
31
31
  end
32
32
 
33
- it "merges content from rows with the same root id" do
33
+ it "replaces content from subsequent rows with the same root id for singular fields" do
34
34
  schema = Syphon::Schema.new do
35
35
  integer :id, 0
36
36
  string :name, 'x'
37
37
  end
38
38
  results = [[1, 'one'], [1, 'two']]
39
+ Syphon::Builder.new(results, schema).to_a.
40
+ must_equal [{id: 1, name: 'two'}]
41
+ end
42
+
43
+ it "combines content from subsequent rows with the same root id for multi fields" do
44
+ schema = Syphon::Schema.new do
45
+ integer :id, 0
46
+ string :name, 'x', multi: true
47
+ end
48
+ results = [[1, 'one'], [1, 'two']]
39
49
  Syphon::Builder.new(results, schema).to_a.
40
50
  must_equal [{id: 1, name: ['one', 'two']}]
41
51
  end
42
52
 
43
- it "merges content with the same root id correctly when there are nested fields" do
53
+ it "replaces content from subsequent rows for singular nested fields" do
44
54
  schema = Syphon::Schema.new do
45
55
  integer :id, 0
46
56
  nested :nested1 do
47
- integer :a, 'x'
48
- end
49
- nested :nested2 do
50
- integer :a, 'x'
57
+ integer :id, 0
58
+ integer :name, 'x'
51
59
  end
52
60
  end
53
- results = [[1, 10, 11], [2, 20, 21]]
54
- Syphon::Builder.new(results, schema).to_a.must_equal [
55
- {id: 1, nested1: {a: 10}, nested2: {a: 11}},
56
- {id: 2, nested1: {a: 20}, nested2: {a: 21}},
57
- ]
61
+ results = [[1, 2, 'a'], [1, 3, 'b']]
62
+ Syphon::Builder.new(results, schema).to_a.
63
+ must_equal [{id: 1, nested1: {id: 3, name: 'b'}}]
58
64
  end
59
65
 
60
- it "supports arrays as nested fields" do
66
+ it "replaces content from subsequent rows for multi nested fields" do
61
67
  schema = Syphon::Schema.new do
62
68
  integer :id, 0
63
- nested :nested1 do
69
+ nested :nested1, multi: true do
64
70
  integer :id, 0
65
71
  integer :name, 'x'
66
72
  end
@@ -106,6 +106,13 @@ describe Syphon::Index do
106
106
  hits.map { |doc| doc['_source']['login'] }.must_equal ['bob']
107
107
  end
108
108
 
109
+ it "deletes the index if building fails" do
110
+ indices = TestIndex.client.indices.status['indices'].keys.to_set
111
+ TestIndex.source.instance_eval { def import(*); raise 'fubar'; end }
112
+ -> { TestIndex.build }.must_raise(RuntimeError)
113
+ TestIndex.client.indices.status['indices'].keys.to_set.must_equal indices
114
+ end
115
+
109
116
  it "passes configured index settings" do
110
117
  TestIndex.index_settings = {number_of_shards: 23}
111
118
  TestIndex.build
@@ -81,7 +81,14 @@ describe Syphon::Railtie do
81
81
  Syphon.database_configuration.must_equal({database: 'mydb'})
82
82
  end
83
83
 
84
- it "defaults to the primary ActiveRecord configuration" do
84
+ it "defaults to a configuration for syphon in the current environment" do
85
+ write_config('test' => {})
86
+ params[:dbconfig] = {'test_syphon' => {database: 'syphondb'}, 'test' => {database: 'ardb'}}
87
+ Syphon::Railtie.set_configuration(params)
88
+ Syphon.database_configuration.must_equal({database: 'syphondb'})
89
+ end
90
+
91
+ it "defaults to the primary ActiveRecord configuration otherwise" do
85
92
  write_config('test' => {})
86
93
  Syphon::Railtie.set_configuration(params)
87
94
  Syphon.database_configuration.must_equal({database: 'ardb'})
data/test/test_helper.rb CHANGED
@@ -5,7 +5,7 @@ require 'minitest/spec'
5
5
  require 'yaml'
6
6
  require 'fileutils'
7
7
  require 'temporaries'
8
- require 'debugger' if RUBY_VERSION < '2.0'
8
+ require 'byebug'
9
9
  require 'looksee'
10
10
  require 'rails'
11
11
 
@@ -20,21 +20,29 @@ MiniTest::Spec.class_eval do
20
20
  def self.uses_users_table
21
21
  let(:db) { Syphon.database_connection }
22
22
 
23
- before do
24
- columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
25
- db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
26
- end
23
+ include Module.new {
24
+ extend MiniTest::Spec::DSL
27
25
 
28
- after do
29
- db.query "DROP TABLE IF EXISTS users"
30
- end
26
+ before do
27
+ columns = "id int auto_increment PRIMARY KEY, login VARCHAR(20)"
28
+ db.query "CREATE TABLE IF NOT EXISTS users(#{columns})"
29
+ end
30
+
31
+ after do
32
+ db.query "DROP TABLE IF EXISTS users"
33
+ end
34
+ }
31
35
  end
32
36
 
33
37
  def self.uses_elasticsearch
34
38
  let(:client) { Syphon.client }
35
39
 
36
- before { clear_indices }
37
- after { clear_indices }
40
+ include Module.new {
41
+ extend MiniTest::Spec::DSL
42
+
43
+ before { clear_indices }
44
+ after { clear_indices }
45
+ }
38
46
  end
39
47
 
40
48
  def clear_indices
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: syphon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - George Ogata
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-12-12 00:00:00.000000000 Z
11
+ date: 2014-03-18 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: elasticsearch
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: 0.4.0
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: 0.4.0
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: activesupport
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "<"
36
32
  - !ruby/object:Gem::Version
37
- version: 3.2.0
33
+ version: '5'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "<"
44
39
  - !ruby/object:Gem::Version
45
- version: 3.2.0
40
+ version: '5'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: mysql2
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: 0.3.12
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: 0.3.12
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: bundler
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - ">="
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - ">="
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0'
78
69
  description: Syphon data from an Arel source into ElasticSearch
@@ -82,7 +73,7 @@ executables: []
82
73
  extensions: []
83
74
  extra_rdoc_files: []
84
75
  files:
85
- - .gitignore
76
+ - ".gitignore"
86
77
  - CHANGELOG
87
78
  - Gemfile
88
79
  - LICENSE
@@ -107,33 +98,26 @@ files:
107
98
  - test/test_syphon.rb
108
99
  homepage: https://github.com/howaboutwe/syphon
109
100
  licenses: []
101
+ metadata: {}
110
102
  post_install_message:
111
103
  rdoc_options: []
112
104
  require_paths:
113
105
  - lib
114
106
  required_ruby_version: !ruby/object:Gem::Requirement
115
- none: false
116
107
  requirements:
117
- - - ! '>='
108
+ - - ">="
118
109
  - !ruby/object:Gem::Version
119
110
  version: '0'
120
- segments:
121
- - 0
122
- hash: -2157484304572429612
123
111
  required_rubygems_version: !ruby/object:Gem::Requirement
124
- none: false
125
112
  requirements:
126
- - - ! '>='
113
+ - - ">="
127
114
  - !ruby/object:Gem::Version
128
115
  version: '0'
129
- segments:
130
- - 0
131
- hash: -2157484304572429612
132
116
  requirements: []
133
117
  rubyforge_project:
134
- rubygems_version: 1.8.25
118
+ rubygems_version: 2.2.2
135
119
  signing_key:
136
- specification_version: 3
120
+ specification_version: 4
137
121
  summary: Syphon data from an Arel source into ElasticSearch
138
122
  test_files:
139
123
  - test/config.yml.sample