couch_tap 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +59 -0
- data/README.md +183 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/bin/couch_tap +13 -0
- data/couch_tap.gemspec +22 -0
- data/examples/feed.rb +27 -0
- data/lib/couch_tap.rb +48 -0
- data/lib/couch_tap/builders/collection.rb +41 -0
- data/lib/couch_tap/builders/table.rb +161 -0
- data/lib/couch_tap/changes.rb +160 -0
- data/lib/couch_tap/destroyers/collection.rb +36 -0
- data/lib/couch_tap/destroyers/table.rb +76 -0
- data/lib/couch_tap/document_handler.rb +73 -0
- data/lib/couch_tap/schema.rb +32 -0
- data/test/functional/functional_changes_test.rb +37 -0
- data/test/test_helper.rb +16 -0
- data/test/unit/builders/collection_test.rb +74 -0
- data/test/unit/builders/table_test.rb +259 -0
- data/test/unit/changes_test.rb +95 -0
- data/test/unit/destroyers/collection_test.rb +55 -0
- data/test/unit/destroyers/table_test.rb +120 -0
- data/test/unit/document_handler_test.rb +80 -0
- data/test/unit/schema_test.rb +52 -0
- metadata +180 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2667fb312598a6e1960dc06f91b4feec38d2419e
|
4
|
+
data.tar.gz: c45097c89984d980fb67d4996c468cabf28f6fae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 01ebd6988be11f2ee4d0ca1d4cbbb42d10dcbcd5b7457363da2c11167ab9eae8e468a98c70288517e4477f83b2dd89df3db96405ce2eca0f6a3e871b76c0be59
|
7
|
+
data.tar.gz: e4054c2b4447ffc355e3ca893b708bb7b9ea28a0e4b12da68bd23b628a22d3e3eae3d4728a2b2ac2df276069c271f2cb423be58a66df064ccbd75dd77d243549
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
couch_tap (0.0.2)
|
5
|
+
activesupport (>= 3.0.0)
|
6
|
+
couchrest (~> 1.1.3)
|
7
|
+
em-http-request (~> 1.0.3)
|
8
|
+
sequel (>= 3.45.0)
|
9
|
+
yajl-ruby (~> 1.1.0)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
activesupport (4.0.2)
|
15
|
+
i18n (~> 0.6, >= 0.6.4)
|
16
|
+
minitest (~> 4.2)
|
17
|
+
multi_json (~> 1.3)
|
18
|
+
thread_safe (~> 0.1)
|
19
|
+
tzinfo (~> 0.3.37)
|
20
|
+
addressable (2.3.6)
|
21
|
+
atomic (1.1.14)
|
22
|
+
cookiejar (0.3.2)
|
23
|
+
couchrest (1.1.3)
|
24
|
+
mime-types (~> 1.15)
|
25
|
+
multi_json (~> 1.0)
|
26
|
+
rest-client (~> 1.6.1)
|
27
|
+
em-http-request (1.0.3)
|
28
|
+
addressable (>= 2.2.3)
|
29
|
+
cookiejar
|
30
|
+
em-socksify
|
31
|
+
eventmachine (>= 1.0.0.beta.4)
|
32
|
+
http_parser.rb (>= 0.5.3)
|
33
|
+
em-socksify (0.3.0)
|
34
|
+
eventmachine (>= 1.0.0.beta.4)
|
35
|
+
eventmachine (1.0.3)
|
36
|
+
http_parser.rb (0.6.0)
|
37
|
+
i18n (0.6.9)
|
38
|
+
metaclass (0.0.1)
|
39
|
+
mime-types (1.25.1)
|
40
|
+
minitest (4.7.5)
|
41
|
+
mocha (0.13.3)
|
42
|
+
metaclass (~> 0.0.1)
|
43
|
+
multi_json (1.9.2)
|
44
|
+
rest-client (1.6.7)
|
45
|
+
mime-types (>= 1.16)
|
46
|
+
sequel (4.9.0)
|
47
|
+
sqlite3 (1.3.7)
|
48
|
+
thread_safe (0.1.3)
|
49
|
+
atomic
|
50
|
+
tzinfo (0.3.38)
|
51
|
+
yajl-ruby (1.1.0)
|
52
|
+
|
53
|
+
PLATFORMS
|
54
|
+
ruby
|
55
|
+
|
56
|
+
DEPENDENCIES
|
57
|
+
couch_tap!
|
58
|
+
mocha
|
59
|
+
sqlite3
|
data/README.md
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
# Couch Tap
|
3
|
+
|
4
|
+
Utility to listen to a CouchDB changes feed and automatically insert, update,
|
5
|
+
or delete rows into a relational database from matching key-value conditions of incoming documents.
|
6
|
+
|
7
|
+
While CouchDB is awesome, business people probably won't be
|
8
|
+
quite as impressed when they want to play around with the data. Regular SQL
|
9
|
+
is generally accepted as being easy to use and much more widely supported by a larger
|
10
|
+
range of comercial tools.
|
11
|
+
|
12
|
+
Couch Tap will listen to incoming documents on a CouchDB's changes
|
13
|
+
stream and automatically update rows of RDBMS tables defined in the
|
14
|
+
conversion schema. The changes stream uses a sequence number allowing
|
15
|
+
synchronisation to be started and stopped at will.
|
16
|
+
|
17
|
+
Ruby's fast and simple (sequel)[http://sequel.jeremyevans.net/] library is used to provide the connection to the
|
18
|
+
database. This library can also be used for migrations, important for frequently changing schemas.
|
19
|
+
|
20
|
+
Couch tap takes a simple two-step approach converting documents to rows. When a change event is received
|
21
|
+
for a matching `document` definition, each associated row is completely deleted. If the change
|
22
|
+
is anything other than a delete event, the rows will be re-created with the new data.
|
23
|
+
This makes things much easier when trying to deal with multi-level documents (i.e. documents of documents)
|
24
|
+
and one-to-many table relationships.
|
25
|
+
|
26
|
+
|
27
|
+
## A Couch Tap Project
|
28
|
+
|
29
|
+
Couch Tap requires a configuration or filter definition that will allow incoming
|
30
|
+
document changes to be identified and dealt with.
|
31
|
+
|
32
|
+
The following example attempts to outline most of the key features of the DSL.
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
# The couchdb database from which to request the changes feed
|
36
|
+
changes "http://user:pass@host:port/invoicing" do
|
37
|
+
|
38
|
+
# Which database should we connect to?
|
39
|
+
database "postgres://user:pass@localhost:5432/invoicing"
|
40
|
+
|
41
|
+
# Simple automated copy, each property's value in the matching CouchDB
|
42
|
+
# document will copied to the table field with the same name.
|
43
|
+
document 'type' => 'User' do
|
44
|
+
table :users
|
45
|
+
end
|
46
|
+
|
47
|
+
document 'type' => 'Invoice' do
|
48
|
+
|
49
|
+
table :invoices, :key => :invoice_id do
|
50
|
+
|
51
|
+
# Copy columns from fields with different name
|
52
|
+
column :updated_at, :updated_on
|
53
|
+
column :created_at, :created_on
|
54
|
+
|
55
|
+
# Manually set a value from document or fixed variable
|
56
|
+
column :date, doc['date'].to_json
|
57
|
+
column :added_at, Time.now
|
58
|
+
|
59
|
+
# Set column values from a block.
|
60
|
+
column :total do
|
61
|
+
doc['items'].inject(0){ |sum,item| sum + item['total'] }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Collections perform special synchronization in order to deal with
|
65
|
+
# one to one, or indeed many to many relationships.
|
66
|
+
#
|
67
|
+
# Rather than attempting a complex syncrhonisation process, the current
|
68
|
+
# version of Couch Tap will just DELETE all current entries with a
|
69
|
+
# primary key id that matches that of the parent table.
|
70
|
+
#
|
71
|
+
# The foreign id key is assumed to be name of the parent
|
72
|
+
# table in singular form with `_id` appended.
|
73
|
+
#
|
74
|
+
# Each item provided in the array will be made available in the
|
75
|
+
# `#data` method, and index from `#index`.
|
76
|
+
# `#document` continues to be the complete source document.
|
77
|
+
#
|
78
|
+
# Collections can be nested to create highly complex structures.
|
79
|
+
#
|
80
|
+
collection :groups do
|
81
|
+
table :invoice_groups do
|
82
|
+
|
83
|
+
collection :entries do
|
84
|
+
table :invoice_entries, :key => :entry_id do
|
85
|
+
column :date, data['date']
|
86
|
+
column :updated_at, document['updated_at']
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Collections can also be used on Many to Many relationships.
|
94
|
+
collection :label_ids do
|
95
|
+
table :invoice_labels do
|
96
|
+
column :label_id, data
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
## DSL Summary
|
107
|
+
|
108
|
+
### changes
|
109
|
+
|
110
|
+
Defines which CouchDB database should be used to request the changes feed.
|
111
|
+
|
112
|
+
After loading the rest of the configuration, the service will
|
113
|
+
connect to the database using Event Machine. As new changes come into the
|
114
|
+
system, they will be managed in the background.
|
115
|
+
|
116
|
+
|
117
|
+
### connection
|
118
|
+
|
119
|
+
The Sequel URL used to connect to the destination database. Behind the scenes,
|
120
|
+
Couch Tap will check for a table named `couchdb_sequence` that contains a single
|
121
|
+
row for the current changes sequence id, much like a migration id typically
|
122
|
+
seen in a Rails database.
|
123
|
+
|
124
|
+
As changes are received from CouchDB, the current sequence will be updated to
|
125
|
+
match.
|
126
|
+
|
127
|
+
#### document
|
128
|
+
|
129
|
+
When a document is received from the changes feed, it will be passed through each
|
130
|
+
`document` stanza looking for a match. Take the following example:
|
131
|
+
|
132
|
+
document :type => 'Invoice' do |doc|
|
133
|
+
# ...
|
134
|
+
end
|
135
|
+
|
136
|
+
This will match all documents whose `type` property is equal to "Invoice". The
|
137
|
+
document itself will be made available as a hash through the `doc` block variable.
|
138
|
+
|
139
|
+
`document` stanzas may be nested if required to provide further levels of
|
140
|
+
filtering.
|
141
|
+
|
142
|
+
#### table
|
143
|
+
|
144
|
+
Each `table` stanza lets Couch Tap know that all or part of the current document
|
145
|
+
should be inserted into it. By default, the matching table's schema will be read
|
146
|
+
and any field names that match a property in the top-level of the document will
|
147
|
+
be inserted automatically.
|
148
|
+
|
149
|
+
One of the limitations of Couch Tap is that all tables must have an id field as their
|
150
|
+
primary key. In each row, the id's value will be copied from the `_id` of the
|
151
|
+
document being imported. This is the only way that deleted documents can be
|
152
|
+
reliably found and removed from the relational database.
|
153
|
+
|
154
|
+
#### column
|
155
|
+
|
156
|
+
#### collection
|
157
|
+
|
158
|
+
#### foreign_key
|
159
|
+
|
160
|
+
|
161
|
+
### Notes on deleted documents
|
162
|
+
|
163
|
+
Synchronising a deleted document is generally a much more complicated operation.
|
164
|
+
Given that the original document no longer exists in the CouchDB database,
|
165
|
+
there is no way to know which document group and table the document was inserted
|
166
|
+
into.
|
167
|
+
|
168
|
+
To get around this issue, Couch Tap will search through all the tables defined
|
169
|
+
for the database and delete rows that match the primary or foreign keys.
|
170
|
+
|
171
|
+
Obviously, this is very inefficient. Fortunately, CouchDB is not really suited
|
172
|
+
to systems that require lots of document deletion, so hopefully this won't be
|
173
|
+
too much of a problem.
|
174
|
+
|
175
|
+
|
176
|
+
## Testing
|
177
|
+
|
178
|
+
Run tests using rake, or individual tests as follows:
|
179
|
+
|
180
|
+
rake test TEST=test/unit/changes_test.rb
|
181
|
+
|
182
|
+
|
183
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
|
2
|
+
require 'bundler'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Bundler::GemHelper.install_tasks
|
7
|
+
|
8
|
+
Rake::TestTask.new do |t|
|
9
|
+
t.libs << 'test'
|
10
|
+
t.test_files = FileList.new('test/unit/**/*.rb')
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Run tests"
|
14
|
+
task :default => :test
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
data/bin/couch_tap
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'couch_tap'
|
5
|
+
|
6
|
+
# Take in the arguments for the configuration file and try to run it
|
7
|
+
CouchTap.logger.info "Reading configuration: #{ARGV[0]}"
|
8
|
+
|
9
|
+
CouchTap.module_eval(File.open(ARGV[0]).read)
|
10
|
+
|
11
|
+
# With the configuration loaded, start her up!
|
12
|
+
CouchTap.start
|
13
|
+
|
data/couch_tap.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "couch_tap"
|
3
|
+
s.version = `cat VERSION`.strip
|
4
|
+
s.date = File.mtime('VERSION')
|
5
|
+
s.summary = "Listen to a CouchDB changes feed and create rows in a relational database in real-time."
|
6
|
+
s.description = "Couch Tap provides a DSL that allows complex CouchDB documents to be converted into rows in a RDBMS' table. The stream of events received from the CouchDB changes feed will trigger documents to be fed into a matching filter block and saved in the database."
|
7
|
+
s.authors = ["Sam Lown"]
|
8
|
+
s.email = 'me@samlown.com'
|
9
|
+
|
10
|
+
s.files = `git ls-files`.split("\n")
|
11
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
|
15
|
+
s.add_dependency "couchrest", "~> 1.1.3"
|
16
|
+
s.add_dependency "em-http-request", "~> 1.0.3"
|
17
|
+
s.add_dependency "yajl-ruby", "~> 1.1.0"
|
18
|
+
s.add_dependency "sequel", ">= 3.45.0"
|
19
|
+
s.add_dependency "activesupport", ">= 3.0.0"
|
20
|
+
s.add_development_dependency "mocha"
|
21
|
+
s.add_development_dependency "sqlite3"
|
22
|
+
end
|
data/examples/feed.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
# Sample Configuration Script
|
3
|
+
#
|
4
|
+
# Run using the command line application:
|
5
|
+
#
|
6
|
+
# couch_tap feed.rb
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
changes "http://user:pass@host:port/invoicing" do
|
11
|
+
|
12
|
+
# Which database should we connect to?
|
13
|
+
database "sqlite:///database.sqlite3"
|
14
|
+
|
15
|
+
filter 'type' => 'User' do
|
16
|
+
table :users
|
17
|
+
end
|
18
|
+
|
19
|
+
filter 'type' => 'Journey' do
|
20
|
+
table :journeys
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
data/lib/couch_tap.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
|
2
|
+
# Low level requirements
|
3
|
+
require 'sequel'
|
4
|
+
require 'couchrest'
|
5
|
+
require 'em-http'
|
6
|
+
require 'yajl'
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/inflector'
|
9
|
+
require 'active_support/core_ext/object/blank'
|
10
|
+
|
11
|
+
# Our stuff
|
12
|
+
require 'couch_tap/changes'
|
13
|
+
require 'couch_tap/schema'
|
14
|
+
require 'couch_tap/document_handler'
|
15
|
+
require 'couch_tap/builders/collection'
|
16
|
+
require 'couch_tap/builders/table'
|
17
|
+
require 'couch_tap/destroyers/collection'
|
18
|
+
require 'couch_tap/destroyers/table'
|
19
|
+
|
20
|
+
|
21
|
+
module CouchTap
|
22
|
+
extend self
|
23
|
+
|
24
|
+
def changes(database, &block)
|
25
|
+
(@changes ||= []) << Changes.new(database, &block)
|
26
|
+
end
|
27
|
+
|
28
|
+
def start
|
29
|
+
EventMachine.run do
|
30
|
+
@changes.each do |changes|
|
31
|
+
changes.start
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Provide some way to handle messages
|
37
|
+
def logger
|
38
|
+
@logger ||= prepare_logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def prepare_logger
|
42
|
+
log = Logger.new(STDOUT)
|
43
|
+
log.level = Logger::INFO
|
44
|
+
log
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
module CouchTap
|
3
|
+
|
4
|
+
module Builders
|
5
|
+
|
6
|
+
#
|
7
|
+
# Collection Builder. Go through each sub-table definition and recursively
|
8
|
+
# prepare the data ready to be inserted into the database.
|
9
|
+
#
|
10
|
+
class Collection
|
11
|
+
|
12
|
+
attr_reader :parent, :field
|
13
|
+
|
14
|
+
def initialize(parent, field, opts = {}, &block)
|
15
|
+
@_tables = []
|
16
|
+
@parent = parent
|
17
|
+
@field = field
|
18
|
+
|
19
|
+
instance_eval(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def execute
|
23
|
+
@_tables.each do |table|
|
24
|
+
table.execute
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
#### DSL Methods
|
29
|
+
|
30
|
+
def table(name, opts = {}, &block)
|
31
|
+
source = parent.data[field.to_s] || []
|
32
|
+
source.each do |item|
|
33
|
+
options = opts.merge(:data => item)
|
34
|
+
@_tables << Table.new(parent, name, options, &block)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|