mkpipe-loader-sqlserver 0.4.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkpipe_loader_sqlserver-0.4.2/mkpipe_loader_sqlserver.egg-info → mkpipe_loader_sqlserver-0.5.0}/PKG-INFO +1 -1
- mkpipe_loader_sqlserver-0.5.0/README.md +94 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver/__init__.py +1 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0/mkpipe_loader_sqlserver.egg-info}/PKG-INFO +1 -1
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/setup.py +1 -1
- mkpipe_loader_sqlserver-0.4.2/README.md +0 -3
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/LICENSE +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/MANIFEST.in +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver/jars/.gitkeep +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver.egg-info/SOURCES.txt +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver.egg-info/dependency_links.txt +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver.egg-info/entry_points.txt +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver.egg-info/requires.txt +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver.egg-info/top_level.txt +0 -0
- {mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/setup.cfg +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# mkpipe-loader-sqlserver
|
|
2
|
+
|
|
3
|
+
SQL Server loader plugin for [MkPipe](https://github.com/mkpipe-etl/mkpipe). Writes Spark DataFrames into SQL Server tables via JDBC.
|
|
4
|
+
|
|
5
|
+
## Documentation
|
|
6
|
+
|
|
7
|
+
For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
|
|
8
|
+
|
|
9
|
+
## License
|
|
10
|
+
|
|
11
|
+
This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Connection Configuration
|
|
16
|
+
|
|
17
|
+
```yaml
|
|
18
|
+
connections:
|
|
19
|
+
sqlserver_target:
|
|
20
|
+
variant: sqlserver
|
|
21
|
+
host: localhost
|
|
22
|
+
port: 1433
|
|
23
|
+
database: mydb
|
|
24
|
+
user: myuser
|
|
25
|
+
password: mypassword
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Table Configuration
|
|
31
|
+
|
|
32
|
+
```yaml
|
|
33
|
+
pipelines:
|
|
34
|
+
- name: pg_to_sqlserver
|
|
35
|
+
source: pg_source
|
|
36
|
+
destination: sqlserver_target
|
|
37
|
+
tables:
|
|
38
|
+
- name: public.events
|
|
39
|
+
target_name: dbo.stg_events
|
|
40
|
+
replication_method: full
|
|
41
|
+
batchsize: 10000
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Write Strategy
|
|
47
|
+
|
|
48
|
+
Control how data is written to SQL Server:
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
- name: public.events
|
|
52
|
+
target_name: dbo.stg_events
|
|
53
|
+
write_strategy: upsert # append | replace | upsert | merge
|
|
54
|
+
write_key: [id] # required for upsert/merge
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
| Strategy | SQL Server Behavior |
|
|
58
|
+
|---|---|
|
|
59
|
+
| `append` | Plain `INSERT` via JDBC (default for incremental) |
|
|
60
|
+
| `replace` | Drop and recreate table, then insert (default for full) |
|
|
61
|
+
| `upsert` | `MERGE target USING temp ON ... WHEN MATCHED THEN UPDATE ... WHEN NOT MATCHED THEN INSERT ...;` |
|
|
62
|
+
| `merge` | Same as upsert for SQL Server |
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Write Parallelism & Throughput
|
|
67
|
+
|
|
68
|
+
```yaml
|
|
69
|
+
- name: public.events
|
|
70
|
+
target_name: dbo.stg_events
|
|
71
|
+
replication_method: full
|
|
72
|
+
batchsize: 10000
|
|
73
|
+
write_partitions: 4
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
- **`batchsize`**: rows per JDBC batch insert. SQL Server handles 5,000–20,000 well.
|
|
77
|
+
- **`write_partitions`**: reduces concurrent JDBC connections via `coalesce(N)`.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## All Table Parameters
|
|
82
|
+
|
|
83
|
+
| Parameter | Type | Default | Description |
|
|
84
|
+
|---|---|---|---|
|
|
85
|
+
| `name` | string | required | Source table name |
|
|
86
|
+
| `target_name` | string | required | SQL Server destination table name (include schema) |
|
|
87
|
+
| `replication_method` | `full` / `incremental` | `full` | Replication strategy |
|
|
88
|
+
| `batchsize` | int | `10000` | Rows per JDBC batch insert |
|
|
89
|
+
| `write_partitions` | int | — | Coalesce DataFrame to N partitions before writing |
|
|
90
|
+
| `write_strategy` | string | — | `append`, `replace`, `upsert`, `merge` |
|
|
91
|
+
| `write_key` | list | — | Key columns for upsert/merge (required) |
|
|
92
|
+
| `dedup_columns` | list | — | Columns used for `mkpipe_id` hash deduplication |
|
|
93
|
+
| `tags` | list | `[]` | Tags for selective pipeline execution |
|
|
94
|
+
| `pass_on_error` | bool | `false` | Skip table on error instead of failing |
|
{mkpipe_loader_sqlserver-0.4.2 → mkpipe_loader_sqlserver-0.5.0}/mkpipe_loader_sqlserver/__init__.py
RENAMED
|
@@ -8,6 +8,7 @@ JAR_PACKAGES = ['com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11']
|
|
|
8
8
|
class SqlserverLoader(JdbcLoader, variant='sqlserver'):
|
|
9
9
|
driver_name = 'sqlserver'
|
|
10
10
|
driver_jdbc = 'com.microsoft.sqlserver.jdbc.SQLServerDriver'
|
|
11
|
+
_dialect = 'sqlserver'
|
|
11
12
|
|
|
12
13
|
def build_jdbc_url(self):
|
|
13
14
|
password = unquote(self.password)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|