couch_tap 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +59 -0
- data/README.md +183 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/bin/couch_tap +13 -0
- data/couch_tap.gemspec +22 -0
- data/examples/feed.rb +27 -0
- data/lib/couch_tap.rb +48 -0
- data/lib/couch_tap/builders/collection.rb +41 -0
- data/lib/couch_tap/builders/table.rb +161 -0
- data/lib/couch_tap/changes.rb +160 -0
- data/lib/couch_tap/destroyers/collection.rb +36 -0
- data/lib/couch_tap/destroyers/table.rb +76 -0
- data/lib/couch_tap/document_handler.rb +73 -0
- data/lib/couch_tap/schema.rb +32 -0
- data/test/functional/functional_changes_test.rb +37 -0
- data/test/test_helper.rb +16 -0
- data/test/unit/builders/collection_test.rb +74 -0
- data/test/unit/builders/table_test.rb +259 -0
- data/test/unit/changes_test.rb +95 -0
- data/test/unit/destroyers/collection_test.rb +55 -0
- data/test/unit/destroyers/table_test.rb +120 -0
- data/test/unit/document_handler_test.rb +80 -0
- data/test/unit/schema_test.rb +52 -0
- metadata +180 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2667fb312598a6e1960dc06f91b4feec38d2419e
|
4
|
+
data.tar.gz: c45097c89984d980fb67d4996c468cabf28f6fae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 01ebd6988be11f2ee4d0ca1d4cbbb42d10dcbcd5b7457363da2c11167ab9eae8e468a98c70288517e4477f83b2dd89df3db96405ce2eca0f6a3e871b76c0be59
|
7
|
+
data.tar.gz: e4054c2b4447ffc355e3ca893b708bb7b9ea28a0e4b12da68bd23b628a22d3e3eae3d4728a2b2ac2df276069c271f2cb423be58a66df064ccbd75dd77d243549
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
couch_tap (0.0.2)
|
5
|
+
activesupport (>= 3.0.0)
|
6
|
+
couchrest (~> 1.1.3)
|
7
|
+
em-http-request (~> 1.0.3)
|
8
|
+
sequel (>= 3.45.0)
|
9
|
+
yajl-ruby (~> 1.1.0)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
activesupport (4.0.2)
|
15
|
+
i18n (~> 0.6, >= 0.6.4)
|
16
|
+
minitest (~> 4.2)
|
17
|
+
multi_json (~> 1.3)
|
18
|
+
thread_safe (~> 0.1)
|
19
|
+
tzinfo (~> 0.3.37)
|
20
|
+
addressable (2.3.6)
|
21
|
+
atomic (1.1.14)
|
22
|
+
cookiejar (0.3.2)
|
23
|
+
couchrest (1.1.3)
|
24
|
+
mime-types (~> 1.15)
|
25
|
+
multi_json (~> 1.0)
|
26
|
+
rest-client (~> 1.6.1)
|
27
|
+
em-http-request (1.0.3)
|
28
|
+
addressable (>= 2.2.3)
|
29
|
+
cookiejar
|
30
|
+
em-socksify
|
31
|
+
eventmachine (>= 1.0.0.beta.4)
|
32
|
+
http_parser.rb (>= 0.5.3)
|
33
|
+
em-socksify (0.3.0)
|
34
|
+
eventmachine (>= 1.0.0.beta.4)
|
35
|
+
eventmachine (1.0.3)
|
36
|
+
http_parser.rb (0.6.0)
|
37
|
+
i18n (0.6.9)
|
38
|
+
metaclass (0.0.1)
|
39
|
+
mime-types (1.25.1)
|
40
|
+
minitest (4.7.5)
|
41
|
+
mocha (0.13.3)
|
42
|
+
metaclass (~> 0.0.1)
|
43
|
+
multi_json (1.9.2)
|
44
|
+
rest-client (1.6.7)
|
45
|
+
mime-types (>= 1.16)
|
46
|
+
sequel (4.9.0)
|
47
|
+
sqlite3 (1.3.7)
|
48
|
+
thread_safe (0.1.3)
|
49
|
+
atomic
|
50
|
+
tzinfo (0.3.38)
|
51
|
+
yajl-ruby (1.1.0)
|
52
|
+
|
53
|
+
PLATFORMS
|
54
|
+
ruby
|
55
|
+
|
56
|
+
DEPENDENCIES
|
57
|
+
couch_tap!
|
58
|
+
mocha
|
59
|
+
sqlite3
|
data/README.md
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
# Couch Tap
|
3
|
+
|
4
|
+
Utility to listen to a CouchDB changes feed and automatically insert, update,
|
5
|
+
or delete rows into a relational database from matching key-value conditions of incoming documents.
|
6
|
+
|
7
|
+
While CouchDB is awesome, business people probably won't be
|
8
|
+
quite as impressed when they want to play around with the data. Regular SQL
|
9
|
+
is generally accepted as being easy to use and much more widely supported by a larger
|
10
|
+
range of comercial tools.
|
11
|
+
|
12
|
+
Couch Tap will listen to incoming documents on a CouchDB's changes
|
13
|
+
stream and automatically update rows of RDBMS tables defined in the
|
14
|
+
conversion schema. The changes stream uses a sequence number allowing
|
15
|
+
synchronisation to be started and stopped at will.
|
16
|
+
|
17
|
+
Ruby's fast and simple (sequel)[http://sequel.jeremyevans.net/] library is used to provide the connection to the
|
18
|
+
database. This library can also be used for migrations, important for frequently changing schemas.
|
19
|
+
|
20
|
+
Couch tap takes a simple two-step approach converting documents to rows. When a change event is received
|
21
|
+
for a matching `document` definition, each associated row is completely deleted. If the change
|
22
|
+
is anything other than a delete event, the rows will be re-created with the new data.
|
23
|
+
This makes things much easier when trying to deal with multi-level documents (i.e. documents of documents)
|
24
|
+
and one-to-many table relationships.
|
25
|
+
|
26
|
+
|
27
|
+
## A Couch Tap Project
|
28
|
+
|
29
|
+
Couch Tap requires a configuration or filter definition that will allow incoming
|
30
|
+
document changes to be identified and dealt with.
|
31
|
+
|
32
|
+
The following example attempts to outline most of the key features of the DSL.
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
# The couchdb database from which to request the changes feed
|
36
|
+
changes "http://user:pass@host:port/invoicing" do
|
37
|
+
|
38
|
+
# Which database should we connect to?
|
39
|
+
database "postgres://user:pass@localhost:5432/invoicing"
|
40
|
+
|
41
|
+
# Simple automated copy, each property's value in the matching CouchDB
|
42
|
+
# document will copied to the table field with the same name.
|
43
|
+
document 'type' => 'User' do
|
44
|
+
table :users
|
45
|
+
end
|
46
|
+
|
47
|
+
document 'type' => 'Invoice' do
|
48
|
+
|
49
|
+
table :invoices, :key => :invoice_id do
|
50
|
+
|
51
|
+
# Copy columns from fields with different name
|
52
|
+
column :updated_at, :updated_on
|
53
|
+
column :created_at, :created_on
|
54
|
+
|
55
|
+
# Manually set a value from document or fixed variable
|
56
|
+
column :date, doc['date'].to_json
|
57
|
+
column :added_at, Time.now
|
58
|
+
|
59
|
+
# Set column values from a block.
|
60
|
+
column :total do
|
61
|
+
doc['items'].inject(0){ |sum,item| sum + item['total'] }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Collections perform special synchronization in order to deal with
|
65
|
+
# one to one, or indeed many to many relationships.
|
66
|
+
#
|
67
|
+
# Rather than attempting a complex syncrhonisation process, the current
|
68
|
+
# version of Couch Tap will just DELETE all current entries with a
|
69
|
+
# primary key id that matches that of the parent table.
|
70
|
+
#
|
71
|
+
# The foreign id key is assumed to be name of the parent
|
72
|
+
# table in singular form with `_id` appended.
|
73
|
+
#
|
74
|
+
# Each item provided in the array will be made available in the
|
75
|
+
# `#data` method, and index from `#index`.
|
76
|
+
# `#document` continues to be the complete source document.
|
77
|
+
#
|
78
|
+
# Collections can be nested to create highly complex structures.
|
79
|
+
#
|
80
|
+
collection :groups do
|
81
|
+
table :invoice_groups do
|
82
|
+
|
83
|
+
collection :entries do
|
84
|
+
table :invoice_entries, :key => :entry_id do
|
85
|
+
column :date, data['date']
|
86
|
+
column :updated_at, document['updated_at']
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Collections can also be used on Many to Many relationships.
|
94
|
+
collection :label_ids do
|
95
|
+
table :invoice_labels do
|
96
|
+
column :label_id, data
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
## DSL Summary
|
107
|
+
|
108
|
+
### changes
|
109
|
+
|
110
|
+
Defines which CouchDB database should be used to request the changes feed.
|
111
|
+
|
112
|
+
After loading the rest of the configuration, the service will
|
113
|
+
connect to the database using Event Machine. As new changes come into the
|
114
|
+
system, they will be managed in the background.
|
115
|
+
|
116
|
+
|
117
|
+
### connection
|
118
|
+
|
119
|
+
The Sequel URL used to connect to the destination database. Behind the scenes,
|
120
|
+
Couch Tap will check for a table named `couchdb_sequence` that contains a single
|
121
|
+
row for the current changes sequence id, much like a migration id typically
|
122
|
+
seen in a Rails database.
|
123
|
+
|
124
|
+
As changes are received from CouchDB, the current sequence will be updated to
|
125
|
+
match.
|
126
|
+
|
127
|
+
#### document
|
128
|
+
|
129
|
+
When a document is received from the changes feed, it will be passed through each
|
130
|
+
`document` stanza looking for a match. Take the following example:
|
131
|
+
|
132
|
+
document :type => 'Invoice' do |doc|
|
133
|
+
# ...
|
134
|
+
end
|
135
|
+
|
136
|
+
This will match all documents whose `type` property is equal to "Invoice". The
|
137
|
+
document itself will be made available as a hash through the `doc` block variable.
|
138
|
+
|
139
|
+
`document` stanzas may be nested if required to provide further levels of
|
140
|
+
filtering.
|
141
|
+
|
142
|
+
#### table
|
143
|
+
|
144
|
+
Each `table` stanza lets Couch Tap know that all or part of the current document
|
145
|
+
should be inserted into it. By default, the matching table's schema will be read
|
146
|
+
and any field names that match a property in the top-level of the document will
|
147
|
+
be inserted automatically.
|
148
|
+
|
149
|
+
One of the limitations of Couch Tap is that all tables must have an id field as their
|
150
|
+
primary key. In each row, the id's value will be copied from the `_id` of the
|
151
|
+
document being imported. This is the only way that deleted documents can be
|
152
|
+
reliably found and removed from the relational database.
|
153
|
+
|
154
|
+
#### column
|
155
|
+
|
156
|
+
#### collection
|
157
|
+
|
158
|
+
#### foreign_key
|
159
|
+
|
160
|
+
|
161
|
+
### Notes on deleted documents
|
162
|
+
|
163
|
+
Synchronising a deleted document is generally a much more complicated operation.
|
164
|
+
Given that the original document no longer exists in the CouchDB database,
|
165
|
+
there is no way to know which document group and table the document was inserted
|
166
|
+
into.
|
167
|
+
|
168
|
+
To get around this issue, Couch Tap will search through all the tables defined
|
169
|
+
for the database and delete rows that match the primary or foreign keys.
|
170
|
+
|
171
|
+
Obviously, this is very inefficient. Fortunately, CouchDB is not really suited
|
172
|
+
to systems that require lots of document deletion, so hopefully this won't be
|
173
|
+
too much of a problem.
|
174
|
+
|
175
|
+
|
176
|
+
## Testing
|
177
|
+
|
178
|
+
Run tests using rake, or individual tests as follows:
|
179
|
+
|
180
|
+
rake test TEST=test/unit/changes_test.rb
|
181
|
+
|
182
|
+
|
183
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
|
2
|
+
require 'bundler'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Bundler::GemHelper.install_tasks
|
7
|
+
|
8
|
+
Rake::TestTask.new do |t|
|
9
|
+
t.libs << 'test'
|
10
|
+
t.test_files = FileList.new('test/unit/**/*.rb')
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Run tests"
|
14
|
+
task :default => :test
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
data/bin/couch_tap
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'couch_tap'
|
5
|
+
|
6
|
+
# Take in the arguments for the configuration file and try to run it
|
7
|
+
CouchTap.logger.info "Reading configuration: #{ARGV[0]}"
|
8
|
+
|
9
|
+
CouchTap.module_eval(File.open(ARGV[0]).read)
|
10
|
+
|
11
|
+
# With the configuration loaded, start her up!
|
12
|
+
CouchTap.start
|
13
|
+
|
data/couch_tap.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "couch_tap"
|
3
|
+
s.version = `cat VERSION`.strip
|
4
|
+
s.date = File.mtime('VERSION')
|
5
|
+
s.summary = "Listen to a CouchDB changes feed and create rows in a relational database in real-time."
|
6
|
+
s.description = "Couch Tap provides a DSL that allows complex CouchDB documents to be converted into rows in a RDBMS' table. The stream of events received from the CouchDB changes feed will trigger documents to be fed into a matching filter block and saved in the database."
|
7
|
+
s.authors = ["Sam Lown"]
|
8
|
+
s.email = 'me@samlown.com'
|
9
|
+
|
10
|
+
s.files = `git ls-files`.split("\n")
|
11
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
|
15
|
+
s.add_dependency "couchrest", "~> 1.1.3"
|
16
|
+
s.add_dependency "em-http-request", "~> 1.0.3"
|
17
|
+
s.add_dependency "yajl-ruby", "~> 1.1.0"
|
18
|
+
s.add_dependency "sequel", ">= 3.45.0"
|
19
|
+
s.add_dependency "activesupport", ">= 3.0.0"
|
20
|
+
s.add_development_dependency "mocha"
|
21
|
+
s.add_development_dependency "sqlite3"
|
22
|
+
end
|
data/examples/feed.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
# Sample Configuration Script
|
3
|
+
#
|
4
|
+
# Run using the command line application:
|
5
|
+
#
|
6
|
+
# couch_tap feed.rb
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
changes "http://user:pass@host:port/invoicing" do
|
11
|
+
|
12
|
+
# Which database should we connect to?
|
13
|
+
database "sqlite:///database.sqlite3"
|
14
|
+
|
15
|
+
filter 'type' => 'User' do
|
16
|
+
table :users
|
17
|
+
end
|
18
|
+
|
19
|
+
filter 'type' => 'Journey' do
|
20
|
+
table :journeys
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
data/lib/couch_tap.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
|
2
|
+
# Low level requirements
|
3
|
+
require 'sequel'
|
4
|
+
require 'couchrest'
|
5
|
+
require 'em-http'
|
6
|
+
require 'yajl'
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/inflector'
|
9
|
+
require 'active_support/core_ext/object/blank'
|
10
|
+
|
11
|
+
# Our stuff
|
12
|
+
require 'couch_tap/changes'
|
13
|
+
require 'couch_tap/schema'
|
14
|
+
require 'couch_tap/document_handler'
|
15
|
+
require 'couch_tap/builders/collection'
|
16
|
+
require 'couch_tap/builders/table'
|
17
|
+
require 'couch_tap/destroyers/collection'
|
18
|
+
require 'couch_tap/destroyers/table'
|
19
|
+
|
20
|
+
|
21
|
+
module CouchTap
|
22
|
+
extend self
|
23
|
+
|
24
|
+
def changes(database, &block)
|
25
|
+
(@changes ||= []) << Changes.new(database, &block)
|
26
|
+
end
|
27
|
+
|
28
|
+
def start
|
29
|
+
EventMachine.run do
|
30
|
+
@changes.each do |changes|
|
31
|
+
changes.start
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Provide some way to handle messages
|
37
|
+
def logger
|
38
|
+
@logger ||= prepare_logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def prepare_logger
|
42
|
+
log = Logger.new(STDOUT)
|
43
|
+
log.level = Logger::INFO
|
44
|
+
log
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
module CouchTap
|
3
|
+
|
4
|
+
module Builders
|
5
|
+
|
6
|
+
#
|
7
|
+
# Collection Builder. Go through each sub-table definition and recursively
|
8
|
+
# prepare the data ready to be inserted into the database.
|
9
|
+
#
|
10
|
+
class Collection
|
11
|
+
|
12
|
+
attr_reader :parent, :field
|
13
|
+
|
14
|
+
def initialize(parent, field, opts = {}, &block)
|
15
|
+
@_tables = []
|
16
|
+
@parent = parent
|
17
|
+
@field = field
|
18
|
+
|
19
|
+
instance_eval(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def execute
|
23
|
+
@_tables.each do |table|
|
24
|
+
table.execute
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
#### DSL Methods
|
29
|
+
|
30
|
+
def table(name, opts = {}, &block)
|
31
|
+
source = parent.data[field.to_s] || []
|
32
|
+
source.each do |item|
|
33
|
+
options = opts.merge(:data => item)
|
34
|
+
@_tables << Table.new(parent, name, options, &block)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|