couchdb_to_sql 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rubocop.yml +33 -0
- data/.rubocop_todo.yml +39 -0
- data/.ruby-version +1 -0
- data/.travis.yml +12 -0
- data/.vscode/launch.json +46 -0
- data/Gemfile +11 -0
- data/LICENSE +24 -0
- data/README.md +163 -0
- data/Rakefile +28 -0
- data/VERSION +1 -0
- data/couchdb_to_sql.gemspec +32 -0
- data/examples/feed.rb +22 -0
- data/exe/couchdb_to_sql +23 -0
- data/lib/couchdb_to_sql.rb +42 -0
- data/lib/couchdb_to_sql/changes.rb +286 -0
- data/lib/couchdb_to_sql/document_handler.rb +88 -0
- data/lib/couchdb_to_sql/schema.rb +30 -0
- data/lib/couchdb_to_sql/table_builder.rb +112 -0
- data/lib/couchdb_to_sql/table_deleted_marker.rb +49 -0
- data/lib/couchdb_to_sql/table_destroyer.rb +22 -0
- data/lib/couchdb_to_sql/table_operator.rb +36 -0
- data/test/functional/functional_changes_test.rb +36 -0
- data/test/test_helper.rb +30 -0
- data/test/unit/changes_test.rb +129 -0
- data/test/unit/document_handler_test.rb +79 -0
- data/test/unit/schema_test.rb +52 -0
- data/test/unit/table_builder_test.rb +199 -0
- data/test/unit/table_destroyer_test.rb +65 -0
- metadata +233 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5a78893ba22f84ef03fa138cbee46b65ab1e793a
|
4
|
+
data.tar.gz: a57a1d876ff3969ece6ddfca90ba32c8d0d56216
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 04bf0493beadf5d5265df76e5778fccb65f46d571b0b45fa2910f33b6b3eff69261f230f78715aa3dfb88c1980e8f034e1c7448a228b798b057a587b13741753
|
7
|
+
data.tar.gz: ce176aaf1253318c74a97ba6cef7bedf1c62d219b5c49301091f84f6cc796f6e2951315375594836bfbc247913d4c455c3b194ecb8a835fdd1529cff5a70931b
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.3
|
5
|
+
DisplayCopNames: true
|
6
|
+
Exclude:
|
7
|
+
- bin/**
|
8
|
+
|
9
|
+
Layout/MultilineMethodCallIndentation:
|
10
|
+
EnforcedStyle: indented
|
11
|
+
Layout/MultilineOperationIndentation:
|
12
|
+
EnforcedStyle: indented
|
13
|
+
|
14
|
+
Lint/EndAlignment:
|
15
|
+
EnforcedStyleAlignWith: variable
|
16
|
+
|
17
|
+
Metrics/LineLength:
|
18
|
+
Max: 132
|
19
|
+
Metrics/MethodLength:
|
20
|
+
Severity: warning
|
21
|
+
|
22
|
+
Naming/FileName:
|
23
|
+
Enabled: false
|
24
|
+
|
25
|
+
# Rationale: allow Weirich-style blocks
|
26
|
+
Style/BlockDelimiters:
|
27
|
+
Enabled: false
|
28
|
+
Style/Documentation:
|
29
|
+
Enabled: false
|
30
|
+
Style/Encoding:
|
31
|
+
Enabled: false
|
32
|
+
Style/NumericPredicate:
|
33
|
+
Enabled: false
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-10-02 12:02:20 +0300 using RuboCop version 0.50.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 8
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 36
|
12
|
+
|
13
|
+
# Offense count: 1
|
14
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
15
|
+
Metrics/BlockLength:
|
16
|
+
Max: 59
|
17
|
+
|
18
|
+
# Offense count: 1
|
19
|
+
# Configuration parameters: CountBlocks.
|
20
|
+
Metrics/BlockNesting:
|
21
|
+
Max: 4
|
22
|
+
|
23
|
+
# Offense count: 2
|
24
|
+
# Configuration parameters: CountComments.
|
25
|
+
Metrics/ClassLength:
|
26
|
+
Max: 202
|
27
|
+
|
28
|
+
# Offense count: 2
|
29
|
+
Metrics/CyclomaticComplexity:
|
30
|
+
Max: 8
|
31
|
+
|
32
|
+
# Offense count: 11
|
33
|
+
# Configuration parameters: CountComments.
|
34
|
+
Metrics/MethodLength:
|
35
|
+
Max: 31
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
Metrics/PerceivedComplexity:
|
39
|
+
Max: 10
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.2
|
data/.travis.yml
ADDED
data/.vscode/launch.json
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
{
|
2
|
+
"version": "0.2.0",
|
3
|
+
"configurations": [
|
4
|
+
{
|
5
|
+
"name": "test-unit - run all tests",
|
6
|
+
"type": "Ruby",
|
7
|
+
"request": "launch",
|
8
|
+
"cwd": "${workspaceRoot}",
|
9
|
+
"program": "${workspaceRoot}/bin/rake",
|
10
|
+
"args": [
|
11
|
+
"test"
|
12
|
+
],
|
13
|
+
"env": {
|
14
|
+
"TEST_SQL_URL": "postgres://localhost/couchdb_to_sql_test",
|
15
|
+
"COUCHDB_URL": "http://admin:admin@127.0.0.1:5984/"
|
16
|
+
}
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"name": "test-unit - active spec file only",
|
20
|
+
"type": "Ruby",
|
21
|
+
"request": "launch",
|
22
|
+
"cwd": "${workspaceRoot}",
|
23
|
+
"program": "${workspaceRoot}/bin/rake",
|
24
|
+
"args": [
|
25
|
+
"test"
|
26
|
+
],
|
27
|
+
"env": {
|
28
|
+
"TEST": "${file}",
|
29
|
+
"TEST_SQL_URL": "postgres://localhost/couchdb_to_sql_test",
|
30
|
+
"COUCHDB_URL": "http://admin:admin@127.0.0.1:5984/"
|
31
|
+
}
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"name": "run with test_tap.rb",
|
35
|
+
"type": "Ruby",
|
36
|
+
"request": "launch",
|
37
|
+
"cwd": "${workspaceRoot}",
|
38
|
+
"program": "${workspaceRoot}/bin/bundler",
|
39
|
+
"args": [
|
40
|
+
"exec",
|
41
|
+
"${workspaceRoot}/exe/couchdb_to_sql",
|
42
|
+
"test_tap.rb"
|
43
|
+
]
|
44
|
+
}
|
45
|
+
]
|
46
|
+
}
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright © Sam Lown 2013-2016
|
4
|
+
Copyright © eCraft Sverige AB 2017
|
5
|
+
|
6
|
+
All rights reserved.
|
7
|
+
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
of this software and associated documentation files (the "Software"), to deal
|
10
|
+
in the Software without restriction, including without limitation the rights
|
11
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
|
+
copies of the Software, and to permit persons to whom the Software is
|
13
|
+
furnished to do so, subject to the following conditions:
|
14
|
+
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
16
|
+
copies or substantial portions of the Software.
|
17
|
+
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/ecraft/couch_tap.svg?branch=master)](https://travis-ci.org/ecraft/couch_tap)
|
2
|
+
|
3
|
+
# couchdb_to_sql
|
4
|
+
|
5
|
+
Utility to listen to a CouchDB changes feed and automatically insert, update,
|
6
|
+
or delete rows into an SQL database from matching key-value conditions of incoming documents.
|
7
|
+
|
8
|
+
`couchdb_to_sql` is heavily indebted to [samlown's](https://github.com/samlown) original [couch_tap](https://github.com/samlown/couch_tap) gem. We have added functionality needed for our particular use case, while still trying to keep it reasonably flexible and not too hardwired to the `ember-pouch` use case.
|
9
|
+
|
10
|
+
While CouchDB is awesome, business people probably won't be quite as impressed when they want to play around with the data. Regular SQL is generally accepted as being easy to use and much more widely supported by a larger range of commercial tools.
|
11
|
+
|
12
|
+
`couchdb_to_sql` will listen to incoming documents on a CouchDB server's [_changes feed](http://docs.couchdb.org/en/2.1.0/api/database/changes.html) in continuous mode, and automatically update rows of the SQL database tables defined in the conversion schema. The changes feed uses a sequence number allowing synchronization to be started and stopped at will.
|
13
|
+
|
14
|
+
[Sequel](http://sequel.jeremyevans.net/) is used to provide the connection to the database. This library can also be used for migrations, which is important for frequently changing schemas.
|
15
|
+
|
16
|
+
`couchdb_to_sql` takes a simple two-step approach converting documents to rows. When a change event is received for a matching `document` definition, each associated row is completely deleted. If the change is anything other than a delete event, the rows will be re-created with the new data. This makes things much easier when trying to deal with multi-level documents (i.e. documents of documents) and one-to-many table relationships.
|
17
|
+
|
18
|
+
|
19
|
+
## A `couchdb_to_sql` Project
|
20
|
+
|
21
|
+
`couchdb_to_sql` requires a configuration or filter definition that will allow incoming document changes to be identified and dealt with. The configuration file can either be hand-written or generated dynamically. (For our particular use case with `ember-pouch`, we have chosen to generate it based on the Ember model metadata. The script for this is unfortunately not open source at this time.)
|
22
|
+
|
23
|
+
The following example attempts to outline most of the key features of the DSL.
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
# The couchdb database from which to request the changes feed
|
27
|
+
changes "http://user:pass@host:port/invoicing" do
|
28
|
+
# # Optional flag which can be enabled to take advantage of Postgres 9.5's support for INSERT CONFLICT, e.g. upserts.
|
29
|
+
# # Note: this only deals with the _couchdb_to_sql_sequences metadata table, not the actual CouchDB documents themselves.
|
30
|
+
# upsert_mode
|
31
|
+
|
32
|
+
# # Optional flag which can be enabled if ember-pouch is being used to populate the CouchDB database. ember-pouch uses a
|
33
|
+
# # specially crafted format of the CouchDB documents, where all the data is placed in 'data' node and the 'id' follows a
|
34
|
+
# # particular format. This flag makes couchdb_to_sql presume that all CouchDB documents for the given stream follow this format.
|
35
|
+
# ember_pouch_mode
|
36
|
+
|
37
|
+
# # Optional flag which can be enabled to enable a stricter mode, where processing will abort if an unhandled document is
|
38
|
+
# # encountered.
|
39
|
+
# fail_on_unhandled_document
|
40
|
+
|
41
|
+
# # Optional path to a file containing a JSON array of sequences to skip. The 'seq' value of incoming documents will be compared
|
42
|
+
# # to the values in this array.
|
43
|
+
# skip_seqs_file 'skiplist.json'
|
44
|
+
|
45
|
+
# The target database to which changes will be streamed.
|
46
|
+
database "postgres://user:pass@localhost:5432/invoicing"
|
47
|
+
|
48
|
+
# Simple automated copy, each property's value in the matching CouchDB document will be copied to the table field with the
|
49
|
+
# same name.
|
50
|
+
document 'type' => 'User' do
|
51
|
+
table :users
|
52
|
+
end
|
53
|
+
|
54
|
+
document 'type' => 'Invoice' do
|
55
|
+
|
56
|
+
table :invoices, :key => :invoice_id do
|
57
|
+
|
58
|
+
# Copy columns from fields with different name
|
59
|
+
column :updated_at, :updated_on
|
60
|
+
column :created_at, :created_on
|
61
|
+
|
62
|
+
# Manually set a value from document or fixed variable
|
63
|
+
column :date, doc['date'].to_json
|
64
|
+
column :added_at, Time.now
|
65
|
+
|
66
|
+
# Set column values from a block.
|
67
|
+
column :total do
|
68
|
+
doc['items'].inject(0){ |sum,item| sum + item['total'] }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
```
|
74
|
+
|
75
|
+
## DSL Summary
|
76
|
+
|
77
|
+
### changes
|
78
|
+
|
79
|
+
Defines which CouchDB database should be used to request the changes feed.
|
80
|
+
|
81
|
+
After loading the rest of the configuration, the service will connect to the database using Event Machine. As new changes come into the system, they will be managed in the background.
|
82
|
+
|
83
|
+
|
84
|
+
### connection
|
85
|
+
|
86
|
+
The Sequel URL used to connect to the destination database. Behind the scenes, `couchdb_to_sql` will check for a table named `couchdb_sequence` that contains a single row for the current changes sequence id, much like a migration id typically seen in a Rails database.
|
87
|
+
|
88
|
+
As changes are received from CouchDB, the current sequence will be updated to match.
|
89
|
+
|
90
|
+
#### document
|
91
|
+
|
92
|
+
When a document is received from the changes feed, it will be passed through each
|
93
|
+
`document` stanza looking for a match. Take the following example:
|
94
|
+
|
95
|
+
document :type => 'Invoice' do |doc|
|
96
|
+
# ...
|
97
|
+
end
|
98
|
+
|
99
|
+
This will match all documents whose `type` property is equal to "Invoice". The document itself will be made available as a hash through the `doc` block variable.
|
100
|
+
|
101
|
+
`document` stanzas may be nested if required to provide further levels of filtering.
|
102
|
+
|
103
|
+
#### table
|
104
|
+
|
105
|
+
Each `table` stanza lets `couchdb_to_sql` know that all or part of the current document should be inserted into it. By default, the matching table's schema will be read and any field names that match a property in the top-level of the document will be inserted automatically.
|
106
|
+
|
107
|
+
One of the limitations of `couchdb_to_sql` is that all tables must have an id field as their primary key. In each row, the id's value will be copied from the `_id` of the document being imported. This is the only way that deleted documents can be reliably found and removed from the relational database.
|
108
|
+
|
109
|
+
#### column
|
110
|
+
|
111
|
+
#### foreign_key
|
112
|
+
|
113
|
+
|
114
|
+
### Notes on deleted documents
|
115
|
+
|
116
|
+
CouchDB documents being deleted are not deleted in the SQL database, because this is typically not what you want to do from a data integrity/etc. point of view. Instead, it is marked as deleted.
|
117
|
+
|
118
|
+
For this to work, the following two columns must exist in the table (example given is from PostgreSQL):
|
119
|
+
|
120
|
+
```
|
121
|
+
fieldops_reports=# \d spare_parts
|
122
|
+
Table "public.spare_parts"
|
123
|
+
Column | Type | Modifiers
|
124
|
+
--------------------+--------------------------+------------------------
|
125
|
+
spare_part_id | text | not null
|
126
|
+
id | text |
|
127
|
+
_deleted | boolean | not null default false
|
128
|
+
_deleted_timestamp | timestamp with time zone |
|
129
|
+
```
|
130
|
+
|
131
|
+
(`spare_part_id` is the primary key which will hold the CouchDB id. `id` holds the "Ember ID" in case you are using `ember-pouch` mode. `_deleted*` are the fields which indicate if the record is deleted, and if so, when it was marked as deleted.)
|
132
|
+
|
133
|
+
## Testing
|
134
|
+
|
135
|
+
Run tests using rake, or individual tests as follows:
|
136
|
+
|
137
|
+
```shell
|
138
|
+
$ rake test TEST=test/unit/changes_test.rb
|
139
|
+
```
|
140
|
+
|
141
|
+
If you have disabled the "admin party" in CouchDB, you might have to manually specify the CouchDB URL. Like this:
|
142
|
+
|
143
|
+
```shell
|
144
|
+
$ COUCHDB_URL='http://admin:admin@127.0.0.1:5984/' bundle exec rake test
|
145
|
+
```
|
146
|
+
|
147
|
+
If you want to run tests towards a PostgreSQL database instead of CouchDB:
|
148
|
+
|
149
|
+
```shell
|
150
|
+
$ TEST_SQL_URL='postgres://localhost/couchdb_to_sql_test' bundle exec rake test
|
151
|
+
```
|
152
|
+
|
153
|
+
## Useful environment variables
|
154
|
+
|
155
|
+
- `SEQUEL_LOG_LEVEL=debug` - set to enable logging of all SQL queries executed.
|
156
|
+
|
157
|
+
## Releasing a new version
|
158
|
+
|
159
|
+
- Merge all relevant pull requests
|
160
|
+
- Bump the version in the `VERSION` file. Follow Semantic Versioning principles. Do not prepend the version with a v. You don't need to commit or push after this step, it gets done automatically by the next step.
|
161
|
+
- `git release v1.0.x` (`brew install git-extras` if you are missing the `git release` command.)
|
162
|
+
- `bundle exec rake build release` (builds the `.gem` file and pushes it to Rubygems.org)
|
163
|
+
- `changelog-rs --latest` to regenerate the changelog which can then be copy-pasted to the [releases page](https://github.com/ecraft/couchdb_to_sql/releases). `curl https://sh.rustup.rs -sSf | sh && cargo install changelog-rs` if you don't have it installed. More info on [its web page](https://github.com/perlun/changelog-rs).
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rubocop/rake_task'
|
7
|
+
|
8
|
+
Bundler::GemHelper.install_tasks
|
9
|
+
RuboCop::RakeTask.new
|
10
|
+
|
11
|
+
Rake::TestTask.new do |t|
|
12
|
+
t.libs << 'test'
|
13
|
+
t.test_files = FileList.new('test/unit/**/*.rb')
|
14
|
+
|
15
|
+
# Without this setting, the unit test running generates a load of warnings in unrelated/3rd party gems, which obscures the
|
16
|
+
# real output of the test runs and makes it harder to read.
|
17
|
+
t.warning = false
|
18
|
+
end
|
19
|
+
|
20
|
+
# The tests are unfortunately at the moment MRI only, because of an Sqlite dependency:
|
21
|
+
# https://github.com/ecraft/couchdb_to_sql/issues/9
|
22
|
+
if defined?(JRUBY_VERSION)
|
23
|
+
desc 'Runs Rubocop linting'
|
24
|
+
task default: :rubocop
|
25
|
+
else
|
26
|
+
desc 'Run Rubocop linting and the unit tests'
|
27
|
+
task default: %i[rubocop test]
|
28
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'couchdb_to_sql'
|
5
|
+
s.version = `cat VERSION`.strip
|
6
|
+
s.date = File.mtime('VERSION')
|
7
|
+
s.summary = 'Listen to a CouchDB changes feed and create rows in a relational database in real-time.'
|
8
|
+
s.description = "couchdb_to_sql provides a DSL that allows complex CouchDB documents to be converted into rows in a RDBMS' " \
|
9
|
+
'table. The stream of events received from the CouchDB changes feed will trigger documents to be fed into a ' \
|
10
|
+
'matching filter block and saved in the database.'
|
11
|
+
s.authors = ['Sam Lown', 'Per Lundberg', 'Jens Nockert', 'Andreas Finne']
|
12
|
+
s.homepage = 'https://github.com/ecraft/couchdb_to_sql'
|
13
|
+
s.license = 'MIT'
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.bindir = 'exe'
|
18
|
+
s.executables = `git ls-files -- exe/*`.split("\n").map { |f| File.basename(f) }
|
19
|
+
s.require_paths = ['lib']
|
20
|
+
|
21
|
+
s.add_dependency 'activesupport', '~> 5.0'
|
22
|
+
s.add_dependency 'couchrest', '~> 2.0'
|
23
|
+
s.add_dependency 'httpclient', '~> 2.6'
|
24
|
+
s.add_dependency 'logging_library', '~> 1.0', '>= 1.0.5'
|
25
|
+
s.add_dependency 'sequel', '>= 4.36.0'
|
26
|
+
|
27
|
+
s.add_development_dependency 'mocha'
|
28
|
+
s.add_development_dependency 'rake', '~> 12.0'
|
29
|
+
s.add_development_dependency 'rubocop'
|
30
|
+
s.add_development_dependency 'simplecov', '~> 0.15'
|
31
|
+
s.add_development_dependency 'test-unit', '~> 3.2'
|
32
|
+
end
|
data/examples/feed.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# Sample Configuration Script
|
5
|
+
#
|
6
|
+
# Run using the command line application:
|
7
|
+
#
|
8
|
+
# couchdb_to_sql feed.rb
|
9
|
+
#
|
10
|
+
|
11
|
+
changes 'http://user:pass@host:port/invoicing' do
|
12
|
+
# Which database should we connect to?
|
13
|
+
database 'sqlite:///database.sqlite3'
|
14
|
+
|
15
|
+
filter 'type' => 'User' do
|
16
|
+
table :users
|
17
|
+
end
|
18
|
+
|
19
|
+
filter 'type' => 'Journey' do
|
20
|
+
table :journeys
|
21
|
+
end
|
22
|
+
end
|