couch_tap 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2667fb312598a6e1960dc06f91b4feec38d2419e
4
+ data.tar.gz: c45097c89984d980fb67d4996c468cabf28f6fae
5
+ SHA512:
6
+ metadata.gz: 01ebd6988be11f2ee4d0ca1d4cbbb42d10dcbcd5b7457363da2c11167ab9eae8e468a98c70288517e4477f83b2dd89df3db96405ce2eca0f6a3e871b76c0be59
7
+ data.tar.gz: e4054c2b4447ffc355e3ca893b708bb7b9ea28a0e4b12da68bd23b628a22d3e3eae3d4728a2b2ac2df276069c271f2cb423be58a66df064ccbd75dd77d243549
@@ -0,0 +1,13 @@
1
+ .bundle
2
+ .sass-cache/
3
+ *.sw[opn]
4
+ .rvmrc
5
+ .DS_Store
6
+ *.zip
7
+ *.*~
8
+ .rsync_cache
9
+ zeus.json
10
+ output
11
+ tmp
12
+ crash.log
13
+ pkg
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+
2
+ source 'https://rubygems.org'
3
+ gemspec
4
+
@@ -0,0 +1,59 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ couch_tap (0.0.2)
5
+ activesupport (>= 3.0.0)
6
+ couchrest (~> 1.1.3)
7
+ em-http-request (~> 1.0.3)
8
+ sequel (>= 3.45.0)
9
+ yajl-ruby (~> 1.1.0)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ activesupport (4.0.2)
15
+ i18n (~> 0.6, >= 0.6.4)
16
+ minitest (~> 4.2)
17
+ multi_json (~> 1.3)
18
+ thread_safe (~> 0.1)
19
+ tzinfo (~> 0.3.37)
20
+ addressable (2.3.6)
21
+ atomic (1.1.14)
22
+ cookiejar (0.3.2)
23
+ couchrest (1.1.3)
24
+ mime-types (~> 1.15)
25
+ multi_json (~> 1.0)
26
+ rest-client (~> 1.6.1)
27
+ em-http-request (1.0.3)
28
+ addressable (>= 2.2.3)
29
+ cookiejar
30
+ em-socksify
31
+ eventmachine (>= 1.0.0.beta.4)
32
+ http_parser.rb (>= 0.5.3)
33
+ em-socksify (0.3.0)
34
+ eventmachine (>= 1.0.0.beta.4)
35
+ eventmachine (1.0.3)
36
+ http_parser.rb (0.6.0)
37
+ i18n (0.6.9)
38
+ metaclass (0.0.1)
39
+ mime-types (1.25.1)
40
+ minitest (4.7.5)
41
+ mocha (0.13.3)
42
+ metaclass (~> 0.0.1)
43
+ multi_json (1.9.2)
44
+ rest-client (1.6.7)
45
+ mime-types (>= 1.16)
46
+ sequel (4.9.0)
47
+ sqlite3 (1.3.7)
48
+ thread_safe (0.1.3)
49
+ atomic
50
+ tzinfo (0.3.38)
51
+ yajl-ruby (1.1.0)
52
+
53
+ PLATFORMS
54
+ ruby
55
+
56
+ DEPENDENCIES
57
+ couch_tap!
58
+ mocha
59
+ sqlite3
@@ -0,0 +1,183 @@
1
+
2
+ # Couch Tap
3
+
4
+ Utility to listen to a CouchDB changes feed and automatically insert, update,
5
+ or delete rows into a relational database from matching key-value conditions of incoming documents.
6
+
7
+ While CouchDB is awesome, business people probably won't be
8
+ quite as impressed when they want to play around with the data. Regular SQL
9
+ is generally accepted as being easy to use and much more widely supported by a larger
10
+ range of comercial tools.
11
+
12
+ Couch Tap will listen to incoming documents on a CouchDB's changes
13
+ stream and automatically update rows of RDBMS tables defined in the
14
+ conversion schema. The changes stream uses a sequence number allowing
15
+ synchronisation to be started and stopped at will.
16
+
17
+ Ruby's fast and simple (sequel)[http://sequel.jeremyevans.net/] library is used to provide the connection to the
18
+ database. This library can also be used for migrations, important for frequently changing schemas.
19
+
20
+ Couch tap takes a simple two-step approach converting documents to rows. When a change event is received
21
+ for a matching `document` definition, each associated row is completely deleted. If the change
22
+ is anything other than a delete event, the rows will be re-created with the new data.
23
+ This makes things much easier when trying to deal with multi-level documents (i.e. documents of documents)
24
+ and one-to-many table relationships.
25
+
26
+
27
+ ## A Couch Tap Project
28
+
29
+ Couch Tap requires a configuration or filter definition that will allow incoming
30
+ document changes to be identified and dealt with.
31
+
32
+ The following example attempts to outline most of the key features of the DSL.
33
+
34
+ ```ruby
35
+ # The couchdb database from which to request the changes feed
36
+ changes "http://user:pass@host:port/invoicing" do
37
+
38
+ # Which database should we connect to?
39
+ database "postgres://user:pass@localhost:5432/invoicing"
40
+
41
+ # Simple automated copy, each property's value in the matching CouchDB
42
+ # document will copied to the table field with the same name.
43
+ document 'type' => 'User' do
44
+ table :users
45
+ end
46
+
47
+ document 'type' => 'Invoice' do
48
+
49
+ table :invoices, :key => :invoice_id do
50
+
51
+ # Copy columns from fields with different name
52
+ column :updated_at, :updated_on
53
+ column :created_at, :created_on
54
+
55
+ # Manually set a value from document or fixed variable
56
+ column :date, doc['date'].to_json
57
+ column :added_at, Time.now
58
+
59
+ # Set column values from a block.
60
+ column :total do
61
+ doc['items'].inject(0){ |sum,item| sum + item['total'] }
62
+ end
63
+
64
+ # Collections perform special synchronization in order to deal with
65
+ # one to one, or indeed many to many relationships.
66
+ #
67
+ # Rather than attempting a complex syncrhonisation process, the current
68
+ # version of Couch Tap will just DELETE all current entries with a
69
+ # primary key id that matches that of the parent table.
70
+ #
71
+ # The foreign id key is assumed to be name of the parent
72
+ # table in singular form with `_id` appended.
73
+ #
74
+ # Each item provided in the array will be made available in the
75
+ # `#data` method, and index from `#index`.
76
+ # `#document` continues to be the complete source document.
77
+ #
78
+ # Collections can be nested to create highly complex structures.
79
+ #
80
+ collection :groups do
81
+ table :invoice_groups do
82
+
83
+ collection :entries do
84
+ table :invoice_entries, :key => :entry_id do
85
+ column :date, data['date']
86
+ column :updated_at, document['updated_at']
87
+ end
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ # Collections can also be used on Many to Many relationships.
94
+ collection :label_ids do
95
+ table :invoice_labels do
96
+ column :label_id, data
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+ end
104
+ ```
105
+
106
+ ## DSL Summary
107
+
108
+ ### changes
109
+
110
+ Defines which CouchDB database should be used to request the changes feed.
111
+
112
+ After loading the rest of the configuration, the service will
113
+ connect to the database using Event Machine. As new changes come into the
114
+ system, they will be managed in the background.
115
+
116
+
117
+ ### connection
118
+
119
+ The Sequel URL used to connect to the destination database. Behind the scenes,
120
+ Couch Tap will check for a table named `couchdb_sequence` that contains a single
121
+ row for the current changes sequence id, much like a migration id typically
122
+ seen in a Rails database.
123
+
124
+ As changes are received from CouchDB, the current sequence will be updated to
125
+ match.
126
+
127
+ #### document
128
+
129
+ When a document is received from the changes feed, it will be passed through each
130
+ `document` stanza looking for a match. Take the following example:
131
+
132
+ document :type => 'Invoice' do |doc|
133
+ # ...
134
+ end
135
+
136
+ This will match all documents whose `type` property is equal to "Invoice". The
137
+ document itself will be made available as a hash through the `doc` block variable.
138
+
139
+ `document` stanzas may be nested if required to provide further levels of
140
+ filtering.
141
+
142
+ #### table
143
+
144
+ Each `table` stanza lets Couch Tap know that all or part of the current document
145
+ should be inserted into it. By default, the matching table's schema will be read
146
+ and any field names that match a property in the top-level of the document will
147
+ be inserted automatically.
148
+
149
+ One of the limitations of Couch Tap is that all tables must have an id field as their
150
+ primary key. In each row, the id's value will be copied from the `_id` of the
151
+ document being imported. This is the only way that deleted documents can be
152
+ reliably found and removed from the relational database.
153
+
154
+ #### column
155
+
156
+ #### collection
157
+
158
+ #### foreign_key
159
+
160
+
161
+ ### Notes on deleted documents
162
+
163
+ Synchronising a deleted document is generally a much more complicated operation.
164
+ Given that the original document no longer exists in the CouchDB database,
165
+ there is no way to know which document group and table the document was inserted
166
+ into.
167
+
168
+ To get around this issue, Couch Tap will search through all the tables defined
169
+ for the database and delete rows that match the primary or foreign keys.
170
+
171
+ Obviously, this is very inefficient. Fortunately, CouchDB is not really suited
172
+ to systems that require lots of document deletion, so hopefully this won't be
173
+ too much of a problem.
174
+
175
+
176
+ ## Testing
177
+
178
+ Run tests using rake, or individual tests as follows:
179
+
180
+ rake test TEST=test/unit/changes_test.rb
181
+
182
+
183
+
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ require 'rubygems'
4
+ require 'rake/testtask'
5
+
6
+ Bundler::GemHelper.install_tasks
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList.new('test/unit/**/*.rb')
11
+ end
12
+
13
+ desc "Run tests"
14
+ task :default => :test
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'couch_tap'
5
+
6
+ # Take in the arguments for the configuration file and try to run it
7
+ CouchTap.logger.info "Reading configuration: #{ARGV[0]}"
8
+
9
+ CouchTap.module_eval(File.open(ARGV[0]).read)
10
+
11
+ # With the configuration loaded, start her up!
12
+ CouchTap.start
13
+
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "couch_tap"
3
+ s.version = `cat VERSION`.strip
4
+ s.date = File.mtime('VERSION')
5
+ s.summary = "Listen to a CouchDB changes feed and create rows in a relational database in real-time."
6
+ s.description = "Couch Tap provides a DSL that allows complex CouchDB documents to be converted into rows in a RDBMS' table. The stream of events received from the CouchDB changes feed will trigger documents to be fed into a matching filter block and saved in the database."
7
+ s.authors = ["Sam Lown"]
8
+ s.email = 'me@samlown.com'
9
+
10
+ s.files = `git ls-files`.split("\n")
11
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
12
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
13
+ s.require_paths = ["lib"]
14
+
15
+ s.add_dependency "couchrest", "~> 1.1.3"
16
+ s.add_dependency "em-http-request", "~> 1.0.3"
17
+ s.add_dependency "yajl-ruby", "~> 1.1.0"
18
+ s.add_dependency "sequel", ">= 3.45.0"
19
+ s.add_dependency "activesupport", ">= 3.0.0"
20
+ s.add_development_dependency "mocha"
21
+ s.add_development_dependency "sqlite3"
22
+ end
@@ -0,0 +1,27 @@
1
+
2
+ # Sample Configuration Script
3
+ #
4
+ # Run using the command line application:
5
+ #
6
+ # couch_tap feed.rb
7
+ #
8
+
9
+
10
+ changes "http://user:pass@host:port/invoicing" do
11
+
12
+ # Which database should we connect to?
13
+ database "sqlite:///database.sqlite3"
14
+
15
+ filter 'type' => 'User' do
16
+ table :users
17
+ end
18
+
19
+ filter 'type' => 'Journey' do
20
+ table :journeys
21
+ end
22
+
23
+
24
+ end
25
+
26
+
27
+
@@ -0,0 +1,48 @@
1
+
2
+ # Low level requirements
3
+ require 'sequel'
4
+ require 'couchrest'
5
+ require 'em-http'
6
+ require 'yajl'
7
+ require 'logger'
8
+ require 'active_support/inflector'
9
+ require 'active_support/core_ext/object/blank'
10
+
11
+ # Our stuff
12
+ require 'couch_tap/changes'
13
+ require 'couch_tap/schema'
14
+ require 'couch_tap/document_handler'
15
+ require 'couch_tap/builders/collection'
16
+ require 'couch_tap/builders/table'
17
+ require 'couch_tap/destroyers/collection'
18
+ require 'couch_tap/destroyers/table'
19
+
20
+
21
+ module CouchTap
22
+ extend self
23
+
24
+ def changes(database, &block)
25
+ (@changes ||= []) << Changes.new(database, &block)
26
+ end
27
+
28
+ def start
29
+ EventMachine.run do
30
+ @changes.each do |changes|
31
+ changes.start
32
+ end
33
+ end
34
+ end
35
+
36
+ # Provide some way to handle messages
37
+ def logger
38
+ @logger ||= prepare_logger
39
+ end
40
+
41
+ def prepare_logger
42
+ log = Logger.new(STDOUT)
43
+ log.level = Logger::INFO
44
+ log
45
+ end
46
+
47
+ end
48
+
@@ -0,0 +1,41 @@
1
+
2
+ module CouchTap
3
+
4
+ module Builders
5
+
6
+ #
7
+ # Collection Builder. Go through each sub-table definition and recursively
8
+ # prepare the data ready to be inserted into the database.
9
+ #
10
+ class Collection
11
+
12
+ attr_reader :parent, :field
13
+
14
+ def initialize(parent, field, opts = {}, &block)
15
+ @_tables = []
16
+ @parent = parent
17
+ @field = field
18
+
19
+ instance_eval(&block)
20
+ end
21
+
22
+ def execute
23
+ @_tables.each do |table|
24
+ table.execute
25
+ end
26
+ end
27
+
28
+ #### DSL Methods
29
+
30
+ def table(name, opts = {}, &block)
31
+ source = parent.data[field.to_s] || []
32
+ source.each do |item|
33
+ options = opts.merge(:data => item)
34
+ @_tables << Table.new(parent, name, options, &block)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
41
+