mkpipe-loader-postgres 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkpipe_loader_postgres-0.5.0/mkpipe_loader_postgres.egg-info → mkpipe_loader_postgres-0.6.0}/PKG-INFO +32 -1
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/README.md +31 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres/__init__.py +1 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0/mkpipe_loader_postgres.egg-info}/PKG-INFO +32 -1
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/setup.py +1 -1
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/LICENSE +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/MANIFEST.in +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres/jar_paths.py +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres/jars/.gitkeep +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres.egg-info/SOURCES.txt +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres.egg-info/dependency_links.txt +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres.egg-info/entry_points.txt +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres.egg-info/requires.txt +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres.egg-info/top_level.txt +0 -0
- {mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkpipe-loader-postgres
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: PostgreSQL loader for mkpipe.
|
|
5
5
|
Author: Metin Karakus
|
|
6
6
|
Author-email: metin_karakus@yahoo.com
|
|
@@ -64,8 +64,37 @@ pipelines:
|
|
|
64
64
|
target_name: public.stg_table
|
|
65
65
|
replication_method: full
|
|
66
66
|
batchsize: 10000
|
|
67
|
+
|
|
68
|
+
- name: source_table
|
|
69
|
+
target_name: public.stg_table
|
|
70
|
+
replication_method: incremental
|
|
71
|
+
iterate_column: updated_at
|
|
72
|
+
write_strategy: upsert
|
|
73
|
+
write_key: [id]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Write Strategy
|
|
79
|
+
|
|
80
|
+
Control how data is written to PostgreSQL:
|
|
81
|
+
|
|
82
|
+
```yaml
|
|
83
|
+
- name: source_table
|
|
84
|
+
target_name: public.stg_table
|
|
85
|
+
write_strategy: upsert # append | replace | upsert | merge
|
|
86
|
+
write_key: [id] # required for upsert/merge
|
|
67
87
|
```
|
|
68
88
|
|
|
89
|
+
| Strategy | PostgreSQL Behavior |
|
|
90
|
+
|---|---|
|
|
91
|
+
| `append` | Plain `INSERT` via JDBC (default for incremental) |
|
|
92
|
+
| `replace` | Drop and recreate table, then insert (default for full) |
|
|
93
|
+
| `upsert` | `INSERT ... ON CONFLICT (write_key) DO UPDATE` via temp table |
|
|
94
|
+
| `merge` | Same as upsert for PostgreSQL |
|
|
95
|
+
|
|
96
|
+
> **Note:** `upsert`/`merge` requires `write_key`. The loader writes to a temp table first, then executes a single `INSERT ... ON CONFLICT` statement to merge into the target.
|
|
97
|
+
|
|
69
98
|
---
|
|
70
99
|
|
|
71
100
|
## Write Parallelism & Throughput
|
|
@@ -102,6 +131,8 @@ Two parameters control write performance:
|
|
|
102
131
|
| `replication_method` | `full` / `incremental` | `full` | Replication strategy |
|
|
103
132
|
| `batchsize` | int | `10000` | Rows per JDBC batch insert |
|
|
104
133
|
| `write_partitions` | int | — | Coalesce DataFrame to N partitions before writing |
|
|
134
|
+
| `write_strategy` | string | — | `append`, `replace`, `upsert`, `merge` |
|
|
135
|
+
| `write_key` | list | — | Key columns for upsert/merge (required) |
|
|
105
136
|
| `dedup_columns` | list | — | Columns used for `mkpipe_id` hash deduplication |
|
|
106
137
|
| `tags` | list | `[]` | Tags for selective pipeline execution |
|
|
107
138
|
| `pass_on_error` | bool | `false` | Skip table on error instead of failing |
|
|
@@ -40,8 +40,37 @@ pipelines:
|
|
|
40
40
|
target_name: public.stg_table
|
|
41
41
|
replication_method: full
|
|
42
42
|
batchsize: 10000
|
|
43
|
+
|
|
44
|
+
- name: source_table
|
|
45
|
+
target_name: public.stg_table
|
|
46
|
+
replication_method: incremental
|
|
47
|
+
iterate_column: updated_at
|
|
48
|
+
write_strategy: upsert
|
|
49
|
+
write_key: [id]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Write Strategy
|
|
55
|
+
|
|
56
|
+
Control how data is written to PostgreSQL:
|
|
57
|
+
|
|
58
|
+
```yaml
|
|
59
|
+
- name: source_table
|
|
60
|
+
target_name: public.stg_table
|
|
61
|
+
write_strategy: upsert # append | replace | upsert | merge
|
|
62
|
+
write_key: [id] # required for upsert/merge
|
|
43
63
|
```
|
|
44
64
|
|
|
65
|
+
| Strategy | PostgreSQL Behavior |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `append` | Plain `INSERT` via JDBC (default for incremental) |
|
|
68
|
+
| `replace` | Drop and recreate table, then insert (default for full) |
|
|
69
|
+
| `upsert` | `INSERT ... ON CONFLICT (write_key) DO UPDATE` via temp table |
|
|
70
|
+
| `merge` | Same as upsert for PostgreSQL |
|
|
71
|
+
|
|
72
|
+
> **Note:** `upsert`/`merge` requires `write_key`. The loader writes to a temp table first, then executes a single `INSERT ... ON CONFLICT` statement to merge into the target.
|
|
73
|
+
|
|
45
74
|
---
|
|
46
75
|
|
|
47
76
|
## Write Parallelism & Throughput
|
|
@@ -78,6 +107,8 @@ Two parameters control write performance:
|
|
|
78
107
|
| `replication_method` | `full` / `incremental` | `full` | Replication strategy |
|
|
79
108
|
| `batchsize` | int | `10000` | Rows per JDBC batch insert |
|
|
80
109
|
| `write_partitions` | int | — | Coalesce DataFrame to N partitions before writing |
|
|
110
|
+
| `write_strategy` | string | — | `append`, `replace`, `upsert`, `merge` |
|
|
111
|
+
| `write_key` | list | — | Key columns for upsert/merge (required) |
|
|
81
112
|
| `dedup_columns` | list | — | Columns used for `mkpipe_id` hash deduplication |
|
|
82
113
|
| `tags` | list | `[]` | Tags for selective pipeline execution |
|
|
83
114
|
| `pass_on_error` | bool | `false` | Skip table on error instead of failing |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkpipe-loader-postgres
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: PostgreSQL loader for mkpipe.
|
|
5
5
|
Author: Metin Karakus
|
|
6
6
|
Author-email: metin_karakus@yahoo.com
|
|
@@ -64,8 +64,37 @@ pipelines:
|
|
|
64
64
|
target_name: public.stg_table
|
|
65
65
|
replication_method: full
|
|
66
66
|
batchsize: 10000
|
|
67
|
+
|
|
68
|
+
- name: source_table
|
|
69
|
+
target_name: public.stg_table
|
|
70
|
+
replication_method: incremental
|
|
71
|
+
iterate_column: updated_at
|
|
72
|
+
write_strategy: upsert
|
|
73
|
+
write_key: [id]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Write Strategy
|
|
79
|
+
|
|
80
|
+
Control how data is written to PostgreSQL:
|
|
81
|
+
|
|
82
|
+
```yaml
|
|
83
|
+
- name: source_table
|
|
84
|
+
target_name: public.stg_table
|
|
85
|
+
write_strategy: upsert # append | replace | upsert | merge
|
|
86
|
+
write_key: [id] # required for upsert/merge
|
|
67
87
|
```
|
|
68
88
|
|
|
89
|
+
| Strategy | PostgreSQL Behavior |
|
|
90
|
+
|---|---|
|
|
91
|
+
| `append` | Plain `INSERT` via JDBC (default for incremental) |
|
|
92
|
+
| `replace` | Drop and recreate table, then insert (default for full) |
|
|
93
|
+
| `upsert` | `INSERT ... ON CONFLICT (write_key) DO UPDATE` via temp table |
|
|
94
|
+
| `merge` | Same as upsert for PostgreSQL |
|
|
95
|
+
|
|
96
|
+
> **Note:** `upsert`/`merge` requires `write_key`. The loader writes to a temp table first, then executes a single `INSERT ... ON CONFLICT` statement to merge into the target.
|
|
97
|
+
|
|
69
98
|
---
|
|
70
99
|
|
|
71
100
|
## Write Parallelism & Throughput
|
|
@@ -102,6 +131,8 @@ Two parameters control write performance:
|
|
|
102
131
|
| `replication_method` | `full` / `incremental` | `full` | Replication strategy |
|
|
103
132
|
| `batchsize` | int | `10000` | Rows per JDBC batch insert |
|
|
104
133
|
| `write_partitions` | int | — | Coalesce DataFrame to N partitions before writing |
|
|
134
|
+
| `write_strategy` | string | — | `append`, `replace`, `upsert`, `merge` |
|
|
135
|
+
| `write_key` | list | — | Key columns for upsert/merge (required) |
|
|
105
136
|
| `dedup_columns` | list | — | Columns used for `mkpipe_id` hash deduplication |
|
|
106
137
|
| `tags` | list | `[]` | Tags for selective pipeline execution |
|
|
107
138
|
| `pass_on_error` | bool | `false` | Skip table on error instead of failing |
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='mkpipe-loader-postgres',
|
|
5
|
-
version='0.
|
|
5
|
+
version='0.6.0',
|
|
6
6
|
license='Apache License 2.0',
|
|
7
7
|
packages=find_packages(exclude=['tests', 'scripts', 'deploy', 'install_jars.py']),
|
|
8
8
|
install_requires=['mkpipe'],
|
|
File without changes
|
|
File without changes
|
{mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres/jar_paths.py
RENAMED
|
File without changes
|
{mkpipe_loader_postgres-0.5.0 → mkpipe_loader_postgres-0.6.0}/mkpipe_loader_postgres/jars/.gitkeep
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|