simple-bcp 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.3
2
+ Name: simple_bcp
3
+ Version: 0.3.0
4
+ Summary: A Simple yet powerfult bcp wrapper
5
+ Author: Noam Fisher
6
+ Author-email: noam9997@gmail.com
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 2
10
+ Classifier: Programming Language :: Python :: 2.7
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.4
13
+ Classifier: Programming Language :: Python :: 3.5
14
+ Classifier: Programming Language :: Python :: 3.6
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Requires-Dist: annotated-types (>=0.7.0,<0.8.0)
23
+ Requires-Dist: packaging (>=25.0,<26.0)
24
+ Requires-Dist: pydantic (>=2.11,<3.0)
@@ -0,0 +1,38 @@
1
+ [tool.poetry]
2
+ name = "simple_bcp"
3
+ version = "0.3.0"
4
+ description = "A Simple yet powerfult bcp wrapper"
5
+ authors = ["Noam Fisher <noam9997@gmail.com>"]
6
+ classifiers = [
7
+ "Programming Language :: Python :: 3",
8
+ "License :: OSI Approved :: MIT License",
9
+ "Operating System :: OS Independent",
10
+ ]
11
+
12
+ [tool.poetry.dependencies]
13
+ packaging = "~=25.0"
14
+ pydantic = "~=2.11"
15
+ annotated-types = "~=0.7.0"
16
+
17
+ [tool.semantic_release]
18
+ version_toml = ["pyproject.toml:tool.poetry.version"]
19
+ remote.type = "gitlab"
20
+ build_command = "poetry build"
21
+
22
+ [tool.semantic_release.branches.main]
23
+ match = "(main|master)"
24
+ prerelease = false
25
+
26
+ [tool.semantic_release.branches.develop]
27
+ match = "develop"
28
+ prerelease_token = "rc"
29
+ prerelease = true
30
+
31
+ [tool.semantic_release.branches.any]
32
+ match = "*"
33
+ prerelease_token = "dev"
34
+ prerelease = true
35
+
36
+ [build-system]
37
+ requires = ["poetry-core~=2.1"]
38
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,3 @@
1
+ from ._bcp_adapter import BCPAdapter, MsSqlDatabaseParameters
2
+
3
+ __all__: list[str] = ["BCPAdapter", "MsSqlDatabaseParameters"]
@@ -0,0 +1,150 @@
1
+ import enum
2
+ import logging
3
+ import os
4
+ import pathlib
5
+ import shlex
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ from datetime import timezone, datetime
10
+ from typing import Annotated
11
+
12
+ import packaging.version
13
+ from annotated_types import Gt
14
+ from pydantic import BaseModel, StringConstraints, Field
15
+
16
+ _POSITIVE_INT = Annotated[int, Gt(0)]
17
+
18
+
19
+ class MsSqlDatabaseParameters(BaseModel):
20
+ server_hostname: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1)]
21
+ port: Annotated[int, Field(gt=0, lt=2 ** 16)] = 1433
22
+ username: str
23
+ password: str
24
+ trust_server_certificate: bool = False
25
+
26
+
27
+ class _Mode(enum.Enum):
28
+ IN = "in"
29
+ OUT = "out"
30
+ QUERY_OUT = "queryout"
31
+ FORMAT = "format"
32
+
33
+
34
+ class BCPAdapter:
35
+ def __init__(self, *, bcp_executable_path: pathlib.Path | str | None = None, ):
36
+ self._init_logger()
37
+ self._init_executable_path(executable_path=bcp_executable_path)
38
+ self._init_bcp_version()
39
+
40
+ def _init_logger(self):
41
+ self._logger = logging.getLogger(self.__class__.__name__)
42
+
43
+ def _init_executable_path(self, *, executable_path: pathlib.Path | str | None):
44
+ if executable_path is None:
45
+ default = shutil.which("bcp")
46
+ if default is None:
47
+ raise FileNotFoundError(
48
+ "bcp not found in PATH. Add bcp to PATH environment variable or provide bcp_executable_path explicitly")
49
+ executable_path = pathlib.Path(default)
50
+ elif isinstance(executable_path, str):
51
+ executable_path = pathlib.Path(executable_path)
52
+
53
+ if not executable_path.exists():
54
+ raise FileNotFoundError(f"{executable_path.as_posix()} not found")
55
+
56
+ if not executable_path.is_file():
57
+ raise OSError(f"path {executable_path} is not a file")
58
+
59
+ self._executable_path = executable_path
60
+
61
+ def _init_bcp_version(self):
62
+ result = self._run_bcp_command(["-v"])
63
+ # `bcp -v` output example:
64
+ # BCP Utility for Microsoft SQL Server
65
+ # Copyright (C) Microsoft Corporation. All rights reserved.
66
+ # Version 15.0.2000.5
67
+ raw_version = result.strip().split()[-1]
68
+ self._bcp_version = packaging.version.parse(raw_version)
69
+ self._logger.debug(f"BCP version: {self._bcp_version}", extra={"bcp_version": str(self._bcp_version)})
70
+
71
+ def _run_bcp_command(self, command_args: list[str]) -> str:
72
+ command = [self._executable_path.as_posix()] + command_args
73
+ self._logger.debug(f"Running command: `{command}`", extra={"bcp_command": shlex.join(command)})
74
+ return subprocess.run(command, capture_output=True, check=True).stdout.decode()
75
+
76
+ def _resolve_output_file_path(self, *, path: pathlib.Path | str | None,
77
+ default_filename: str) -> pathlib.Path:
78
+ if path is None:
79
+ path = pathlib.Path(os.getcwd()) / default_filename
80
+
81
+ if isinstance(path, str):
82
+ path = pathlib.Path(path)
83
+
84
+ if path.exists():
85
+ raise FileExistsError(
86
+ f"{path} already exists, bcp requires path to a file that does not exist - and will create it by itself")
87
+
88
+ directory_path = path.absolute().parent
89
+ if not directory_path.exists():
90
+ raise FileNotFoundError(f"directory {directory_path} does not exist")
91
+
92
+ return path
93
+
94
+ def _build_command_args(self, *,
95
+ mode: _Mode,
96
+ source_target_specification: str,
97
+ file_path: pathlib.Path,
98
+ database_parameters: MsSqlDatabaseParameters,
99
+ options: dict[str, str | None] = None
100
+ ) -> list[str]:
101
+ if options is None:
102
+ options = {}
103
+
104
+ command = [
105
+ source_target_specification,
106
+ mode.value,
107
+ file_path.as_posix(),
108
+ "-S", database_parameters.server_hostname,
109
+ "-U", database_parameters.username,
110
+ "-P", database_parameters.password,
111
+ ]
112
+ if sys.platform == "linux" and database_parameters.trust_server_certificate:
113
+ command.append("-u")
114
+
115
+ for key, value in options.items():
116
+ command.append(key)
117
+ if value is not None:
118
+ command.append(value)
119
+
120
+ return command
121
+
122
+ def download_table(self, *, table_name: str, database_parameters: MsSqlDatabaseParameters,
123
+ output_file_path: pathlib.Path | str | None = None,
124
+ batch_size: _POSITIVE_INT | None = None) -> pathlib.Path:
125
+ """
126
+ download table data using bcp
127
+
128
+ :param table_name: the name of the table to download
129
+ :param database_parameters: connection details about the database
130
+ :param output_file_path: output the data to this path.
131
+ Defaults to None which means let this package decide on the path.
132
+ Notice: BCP requires the file to not exist, it will be created using the provided path.
133
+ :param batch_size: Specifies the number of rows per batch of downloaded data
134
+ :return: the path of the downloaded file
135
+ """
136
+ timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d%H%M%S%f")
137
+ default_filename_prefix = "-".join(["simple_bcp", self.download_table.__name__, table_name, timestamp])
138
+ output_file_path = self._resolve_output_file_path(path=output_file_path,
139
+ default_filename=default_filename_prefix)
140
+ options: dict[str, str | None] = {
141
+ "-n": None # use native type
142
+ }
143
+ if batch_size is not None:
144
+ options["-b"] = str(batch_size)
145
+ command = self._build_command_args(mode=_Mode.OUT, source_target_specification=table_name,
146
+ file_path=output_file_path, database_parameters=database_parameters,
147
+ options=options)
148
+ self._run_bcp_command(command)
149
+
150
+ return output_file_path