couch_tap 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2667fb312598a6e1960dc06f91b4feec38d2419e
4
+ data.tar.gz: c45097c89984d980fb67d4996c468cabf28f6fae
5
+ SHA512:
6
+ metadata.gz: 01ebd6988be11f2ee4d0ca1d4cbbb42d10dcbcd5b7457363da2c11167ab9eae8e468a98c70288517e4477f83b2dd89df3db96405ce2eca0f6a3e871b76c0be59
7
+ data.tar.gz: e4054c2b4447ffc355e3ca893b708bb7b9ea28a0e4b12da68bd23b628a22d3e3eae3d4728a2b2ac2df276069c271f2cb423be58a66df064ccbd75dd77d243549
@@ -0,0 +1,13 @@
1
+ .bundle
2
+ .sass-cache/
3
+ *.sw[opn]
4
+ .rvmrc
5
+ .DS_Store
6
+ *.zip
7
+ *.*~
8
+ .rsync_cache
9
+ zeus.json
10
+ output
11
+ tmp
12
+ crash.log
13
+ pkg
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+
2
+ source 'https://rubygems.org'
3
+ gemspec
4
+
@@ -0,0 +1,59 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ couch_tap (0.0.2)
5
+ activesupport (>= 3.0.0)
6
+ couchrest (~> 1.1.3)
7
+ em-http-request (~> 1.0.3)
8
+ sequel (>= 3.45.0)
9
+ yajl-ruby (~> 1.1.0)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ activesupport (4.0.2)
15
+ i18n (~> 0.6, >= 0.6.4)
16
+ minitest (~> 4.2)
17
+ multi_json (~> 1.3)
18
+ thread_safe (~> 0.1)
19
+ tzinfo (~> 0.3.37)
20
+ addressable (2.3.6)
21
+ atomic (1.1.14)
22
+ cookiejar (0.3.2)
23
+ couchrest (1.1.3)
24
+ mime-types (~> 1.15)
25
+ multi_json (~> 1.0)
26
+ rest-client (~> 1.6.1)
27
+ em-http-request (1.0.3)
28
+ addressable (>= 2.2.3)
29
+ cookiejar
30
+ em-socksify
31
+ eventmachine (>= 1.0.0.beta.4)
32
+ http_parser.rb (>= 0.5.3)
33
+ em-socksify (0.3.0)
34
+ eventmachine (>= 1.0.0.beta.4)
35
+ eventmachine (1.0.3)
36
+ http_parser.rb (0.6.0)
37
+ i18n (0.6.9)
38
+ metaclass (0.0.1)
39
+ mime-types (1.25.1)
40
+ minitest (4.7.5)
41
+ mocha (0.13.3)
42
+ metaclass (~> 0.0.1)
43
+ multi_json (1.9.2)
44
+ rest-client (1.6.7)
45
+ mime-types (>= 1.16)
46
+ sequel (4.9.0)
47
+ sqlite3 (1.3.7)
48
+ thread_safe (0.1.3)
49
+ atomic
50
+ tzinfo (0.3.38)
51
+ yajl-ruby (1.1.0)
52
+
53
+ PLATFORMS
54
+ ruby
55
+
56
+ DEPENDENCIES
57
+ couch_tap!
58
+ mocha
59
+ sqlite3
@@ -0,0 +1,183 @@
1
+
2
+ # Couch Tap
3
+
4
+ Utility to listen to a CouchDB changes feed and automatically insert, update,
5
+ or delete rows into a relational database from matching key-value conditions of incoming documents.
6
+
7
+ While CouchDB is awesome, business people probably won't be
8
+ quite as impressed when they want to play around with the data. Regular SQL
9
+ is generally accepted as being easy to use and much more widely supported by a larger
10
+ range of comercial tools.
11
+
12
+ Couch Tap will listen to incoming documents on a CouchDB's changes
13
+ stream and automatically update rows of RDBMS tables defined in the
14
+ conversion schema. The changes stream uses a sequence number allowing
15
+ synchronisation to be started and stopped at will.
16
+
17
+ Ruby's fast and simple (sequel)[http://sequel.jeremyevans.net/] library is used to provide the connection to the
18
+ database. This library can also be used for migrations, important for frequently changing schemas.
19
+
20
+ Couch tap takes a simple two-step approach converting documents to rows. When a change event is received
21
+ for a matching `document` definition, each associated row is completely deleted. If the change
22
+ is anything other than a delete event, the rows will be re-created with the new data.
23
+ This makes things much easier when trying to deal with multi-level documents (i.e. documents of documents)
24
+ and one-to-many table relationships.
25
+
26
+
27
+ ## A Couch Tap Project
28
+
29
+ Couch Tap requires a configuration or filter definition that will allow incoming
30
+ document changes to be identified and dealt with.
31
+
32
+ The following example attempts to outline most of the key features of the DSL.
33
+
34
+ ```ruby
35
+ # The couchdb database from which to request the changes feed
36
+ changes "http://user:pass@host:port/invoicing" do
37
+
38
+ # Which database should we connect to?
39
+ database "postgres://user:pass@localhost:5432/invoicing"
40
+
41
+ # Simple automated copy, each property's value in the matching CouchDB
42
+ # document will copied to the table field with the same name.
43
+ document 'type' => 'User' do
44
+ table :users
45
+ end
46
+
47
+ document 'type' => 'Invoice' do
48
+
49
+ table :invoices, :key => :invoice_id do
50
+
51
+ # Copy columns from fields with different name
52
+ column :updated_at, :updated_on
53
+ column :created_at, :created_on
54
+
55
+ # Manually set a value from document or fixed variable
56
+ column :date, doc['date'].to_json
57
+ column :added_at, Time.now
58
+
59
+ # Set column values from a block.
60
+ column :total do
61
+ doc['items'].inject(0){ |sum,item| sum + item['total'] }
62
+ end
63
+
64
+ # Collections perform special synchronization in order to deal with
65
+ # one to one, or indeed many to many relationships.
66
+ #
67
+ # Rather than attempting a complex syncrhonisation process, the current
68
+ # version of Couch Tap will just DELETE all current entries with a
69
+ # primary key id that matches that of the parent table.
70
+ #
71
+ # The foreign id key is assumed to be name of the parent
72
+ # table in singular form with `_id` appended.
73
+ #
74
+ # Each item provided in the array will be made available in the
75
+ # `#data` method, and index from `#index`.
76
+ # `#document` continues to be the complete source document.
77
+ #
78
+ # Collections can be nested to create highly complex structures.
79
+ #
80
+ collection :groups do
81
+ table :invoice_groups do
82
+
83
+ collection :entries do
84
+ table :invoice_entries, :key => :entry_id do
85
+ column :date, data['date']
86
+ column :updated_at, document['updated_at']
87
+ end
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ # Collections can also be used on Many to Many relationships.
94
+ collection :label_ids do
95
+ table :invoice_labels do
96
+ column :label_id, data
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+ end
104
+ ```
105
+
106
+ ## DSL Summary
107
+
108
+ ### changes
109
+
110
+ Defines which CouchDB database should be used to request the changes feed.
111
+
112
+ After loading the rest of the configuration, the service will
113
+ connect to the database using Event Machine. As new changes come into the
114
+ system, they will be managed in the background.
115
+
116
+
117
+ ### connection
118
+
119
+ The Sequel URL used to connect to the destination database. Behind the scenes,
120
+ Couch Tap will check for a table named `couchdb_sequence` that contains a single
121
+ row for the current changes sequence id, much like a migration id typically
122
+ seen in a Rails database.
123
+
124
+ As changes are received from CouchDB, the current sequence will be updated to
125
+ match.
126
+
127
+ #### document
128
+
129
+ When a document is received from the changes feed, it will be passed through each
130
+ `document` stanza looking for a match. Take the following example:
131
+
132
+ document :type => 'Invoice' do |doc|
133
+ # ...
134
+ end
135
+
136
+ This will match all documents whose `type` property is equal to "Invoice". The
137
+ document itself will be made available as a hash through the `doc` block variable.
138
+
139
+ `document` stanzas may be nested if required to provide further levels of
140
+ filtering.
141
+
142
+ #### table
143
+
144
+ Each `table` stanza lets Couch Tap know that all or part of the current document
145
+ should be inserted into it. By default, the matching table's schema will be read
146
+ and any field names that match a property in the top-level of the document will
147
+ be inserted automatically.
148
+
149
+ One of the limitations of Couch Tap is that all tables must have an id field as their
150
+ primary key. In each row, the id's value will be copied from the `_id` of the
151
+ document being imported. This is the only way that deleted documents can be
152
+ reliably found and removed from the relational database.
153
+
154
+ #### column
155
+
156
+ #### collection
157
+
158
+ #### foreign_key
159
+
160
+
161
+ ### Notes on deleted documents
162
+
163
+ Synchronising a deleted document is generally a much more complicated operation.
164
+ Given that the original document no longer exists in the CouchDB database,
165
+ there is no way to know which document group and table the document was inserted
166
+ into.
167
+
168
+ To get around this issue, Couch Tap will search through all the tables defined
169
+ for the database and delete rows that match the primary or foreign keys.
170
+
171
+ Obviously, this is very inefficient. Fortunately, CouchDB is not really suited
172
+ to systems that require lots of document deletion, so hopefully this won't be
173
+ too much of a problem.
174
+
175
+
176
+ ## Testing
177
+
178
+ Run tests using rake, or individual tests as follows:
179
+
180
+ rake test TEST=test/unit/changes_test.rb
181
+
182
+
183
+
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ require 'rubygems'
4
+ require 'rake/testtask'
5
+
6
+ Bundler::GemHelper.install_tasks
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList.new('test/unit/**/*.rb')
11
+ end
12
+
13
+ desc "Run tests"
14
+ task :default => :test
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'couch_tap'
5
+
6
+ # Take in the arguments for the configuration file and try to run it
7
+ CouchTap.logger.info "Reading configuration: #{ARGV[0]}"
8
+
9
+ CouchTap.module_eval(File.open(ARGV[0]).read)
10
+
11
+ # With the configuration loaded, start her up!
12
+ CouchTap.start
13
+
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "couch_tap"
3
+ s.version = `cat VERSION`.strip
4
+ s.date = File.mtime('VERSION')
5
+ s.summary = "Listen to a CouchDB changes feed and create rows in a relational database in real-time."
6
+ s.description = "Couch Tap provides a DSL that allows complex CouchDB documents to be converted into rows in a RDBMS' table. The stream of events received from the CouchDB changes feed will trigger documents to be fed into a matching filter block and saved in the database."
7
+ s.authors = ["Sam Lown"]
8
+ s.email = 'me@samlown.com'
9
+
10
+ s.files = `git ls-files`.split("\n")
11
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
12
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
13
+ s.require_paths = ["lib"]
14
+
15
+ s.add_dependency "couchrest", "~> 1.1.3"
16
+ s.add_dependency "em-http-request", "~> 1.0.3"
17
+ s.add_dependency "yajl-ruby", "~> 1.1.0"
18
+ s.add_dependency "sequel", ">= 3.45.0"
19
+ s.add_dependency "activesupport", ">= 3.0.0"
20
+ s.add_development_dependency "mocha"
21
+ s.add_development_dependency "sqlite3"
22
+ end
@@ -0,0 +1,27 @@
1
+
2
+ # Sample Configuration Script
3
+ #
4
+ # Run using the command line application:
5
+ #
6
+ # couch_tap feed.rb
7
+ #
8
+
9
+
10
+ changes "http://user:pass@host:port/invoicing" do
11
+
12
+ # Which database should we connect to?
13
+ database "sqlite:///database.sqlite3"
14
+
15
+ filter 'type' => 'User' do
16
+ table :users
17
+ end
18
+
19
+ filter 'type' => 'Journey' do
20
+ table :journeys
21
+ end
22
+
23
+
24
+ end
25
+
26
+
27
+
@@ -0,0 +1,48 @@
1
+
2
+ # Low level requirements
3
+ require 'sequel'
4
+ require 'couchrest'
5
+ require 'em-http'
6
+ require 'yajl'
7
+ require 'logger'
8
+ require 'active_support/inflector'
9
+ require 'active_support/core_ext/object/blank'
10
+
11
+ # Our stuff
12
+ require 'couch_tap/changes'
13
+ require 'couch_tap/schema'
14
+ require 'couch_tap/document_handler'
15
+ require 'couch_tap/builders/collection'
16
+ require 'couch_tap/builders/table'
17
+ require 'couch_tap/destroyers/collection'
18
+ require 'couch_tap/destroyers/table'
19
+
20
+
21
+ module CouchTap
22
+ extend self
23
+
24
+ def changes(database, &block)
25
+ (@changes ||= []) << Changes.new(database, &block)
26
+ end
27
+
28
+ def start
29
+ EventMachine.run do
30
+ @changes.each do |changes|
31
+ changes.start
32
+ end
33
+ end
34
+ end
35
+
36
+ # Provide some way to handle messages
37
+ def logger
38
+ @logger ||= prepare_logger
39
+ end
40
+
41
+ def prepare_logger
42
+ log = Logger.new(STDOUT)
43
+ log.level = Logger::INFO
44
+ log
45
+ end
46
+
47
+ end
48
+
@@ -0,0 +1,41 @@
1
+
2
+ module CouchTap
3
+
4
+ module Builders
5
+
6
+ #
7
+ # Collection Builder. Go through each sub-table definition and recursively
8
+ # prepare the data ready to be inserted into the database.
9
+ #
10
+ class Collection
11
+
12
+ attr_reader :parent, :field
13
+
14
+ def initialize(parent, field, opts = {}, &block)
15
+ @_tables = []
16
+ @parent = parent
17
+ @field = field
18
+
19
+ instance_eval(&block)
20
+ end
21
+
22
+ def execute
23
+ @_tables.each do |table|
24
+ table.execute
25
+ end
26
+ end
27
+
28
+ #### DSL Methods
29
+
30
+ def table(name, opts = {}, &block)
31
+ source = parent.data[field.to_s] || []
32
+ source.each do |item|
33
+ options = opts.merge(:data => item)
34
+ @_tables << Table.new(parent, name, options, &block)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
41
+