metacrunch-db 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Readme.md +86 -0
- data/lib/metacrunch/db/destination.rb +6 -8
- data/lib/metacrunch/db/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d794b9e90321be6c7218516ae260742f3e9fdcbf
|
4
|
+
data.tar.gz: 65b4bbfb94017879c840b95ca8272a0c93beccac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e9682fa4db51b938c16474686b8110849ab7f05483cb70eaf0b753660fb7ea3cc493b6e674f4724e14213cfca7bb03c6d8975baf676f3474ec2c114be7a0245
|
7
|
+
data.tar.gz: 719564bf0c08fe07019316ac71618c94f5c6a1e829763899f92b622a4089cb81d5de69c193f18ee6b52d746dc1eb69e8f2e0276282dd1606b296f912032d071e
|
data/Readme.md
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
metacrunch-db
|
2
|
+
=============
|
3
|
+
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/metacrunch-db.svg)](http://badge.fury.io/rb/metacrunch-db)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/ubpb/metacrunch-db/badges/gpa.svg)](https://codeclimate.com/github/ubpb/metacrunch-db)
|
6
|
+
[![Build Status](https://travis-ci.org/ubpb/metacrunch-db.svg)](https://travis-ci.org/ubpb/metacrunch-db)
|
7
|
+
|
8
|
+
This is the official SQL database package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch). The implementation uses the [Sequel](https://github.com/jeremyevans/sequel) Gem as a dependency. Every database that is supported by Sequel can be used with this package.
|
9
|
+
|
10
|
+
Installation
|
11
|
+
------------
|
12
|
+
|
13
|
+
Include the gem in your `Gemfile`
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem "metacrunch-db", "~> 1.0.0"
|
17
|
+
```
|
18
|
+
|
19
|
+
and run `$ bundle install` to install it.
|
20
|
+
|
21
|
+
Or install it manually
|
22
|
+
|
23
|
+
```
|
24
|
+
$ gem install metacrunch-db
|
25
|
+
```
|
26
|
+
|
27
|
+
|
28
|
+
Usage
|
29
|
+
-----
|
30
|
+
|
31
|
+
*Note: For working examples on how to use this package check out our [demo repository](https://github.com/ubpb/metacrunch-demo).*
|
32
|
+
|
33
|
+
### Source
|
34
|
+
|
35
|
+
The `Metacrunch::DB::Source` class can be used to read data from SQL databases into a metacrunch job.
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
# my_job.metacrunch
|
39
|
+
|
40
|
+
# Create a Sequel database connection
|
41
|
+
SOURCE_DB = Sequel.connect(...)
|
42
|
+
|
43
|
+
# Create a Sequel dataset with an unambiguous order.
|
44
|
+
my_source_dataset = SOURCE_DB[:my_table].order(:id)
|
45
|
+
|
46
|
+
# Set the source
|
47
|
+
source Metacrunch::DB::Source.new(my_source_dataset [, OPTIONS])
|
48
|
+
```
|
49
|
+
|
50
|
+
The implementation uses Sequel's [`paged_each`](http://www.rubydoc.info/github/jeremyevans/sequel/Sequel%2FDataset%3Apaged_each) to efficiently iterate even over large result sets. You can provide the following options, to control how `paged_each` works.
|
51
|
+
|
52
|
+
**Options**
|
53
|
+
|
54
|
+
For a detailed descriptions consult the Sequel documentation of [`paged_each`](http://www.rubydoc.info/github/jeremyevans/sequel/Sequel%2FDataset%3Apaged_each). Please note that the default for `strategy` has been changed to `:filter`.
|
55
|
+
|
56
|
+
* `:rows_per_fetch`: Defaults to 1000.
|
57
|
+
* `:strategy`: `:offset` or `:filter`, Defaults to `:filter`.
|
58
|
+
* `:filter_values`: Defaults to `nil`
|
59
|
+
|
60
|
+
|
61
|
+
### Destination
|
62
|
+
|
63
|
+
The `Metacrunch::DB::Destination` class can be used to write data from a metacrunch job to SQL databases.
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
# my_job.metacrunch
|
67
|
+
|
68
|
+
# Create a Sequel database connection
|
69
|
+
DEST_DB = Sequel.connect(...)
|
70
|
+
|
71
|
+
# Create a Sequel dataset where data should be written
|
72
|
+
my_target_dataset = DEST_DB[:my_table]
|
73
|
+
|
74
|
+
# For performance reasons it may be useful to create a batch
|
75
|
+
# of records that gets written to the database
|
76
|
+
transformation ->(row) { row }, batch_size: 1000
|
77
|
+
|
78
|
+
# Set the destination
|
79
|
+
destination Metacrunch::DB::Destination.new(my_target_dataset [, OPTIONS])
|
80
|
+
```
|
81
|
+
|
82
|
+
**Options**
|
83
|
+
|
84
|
+
* `use_upsert`: When set to `true` it will perform an upsert (Update an existing record) and not an insert. Defaults to `false`.
|
85
|
+
* `primary_key`: The primary key to use to identify an existing record in case of an upsert. It defaults to `:id`.
|
86
|
+
* `transaction_options`: A hash of options to control how the database should handle the transaction. For a complete list of available options checkout out the Sequel documentation [here](http://www.rubydoc.info/github/jeremyevans/sequel/Sequel/Database#transaction-instance_method).
|
@@ -6,8 +6,7 @@ module Metacrunch
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
use_upsert: false,
|
8
8
|
primary_key: :id,
|
9
|
-
|
10
|
-
num_retries: 5
|
9
|
+
transaction_options: {}
|
11
10
|
}
|
12
11
|
|
13
12
|
def initialize(sequel_dataset, options = {})
|
@@ -16,10 +15,9 @@ module Metacrunch
|
|
16
15
|
end
|
17
16
|
|
18
17
|
def write(data)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
) do
|
18
|
+
return if data.blank?
|
19
|
+
|
20
|
+
@dataset.db.transaction(@options[:transaction_options]) do
|
23
21
|
if data.is_a?(Array)
|
24
22
|
data.each{|d| insert_or_upsert(d) }
|
25
23
|
else
|
@@ -35,11 +33,11 @@ module Metacrunch
|
|
35
33
|
private
|
36
34
|
|
37
35
|
def insert_or_upsert(data)
|
38
|
-
@use_upsert ? upsert(data) : insert(data)
|
36
|
+
@options[:use_upsert] ? upsert(data) : insert(data)
|
39
37
|
end
|
40
38
|
|
41
39
|
def insert(data)
|
42
|
-
@dataset.insert(data)
|
40
|
+
@dataset.insert(data)
|
43
41
|
end
|
44
42
|
|
45
43
|
def upsert(data)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -49,6 +49,7 @@ files:
|
|
49
49
|
- Gemfile
|
50
50
|
- License.txt
|
51
51
|
- Rakefile
|
52
|
+
- Readme.md
|
52
53
|
- bin/console
|
53
54
|
- lib/metacrunch/db.rb
|
54
55
|
- lib/metacrunch/db/destination.rb
|