perspective-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ """Playground for Qlik Sense APIs exploration.
2
+
3
+ We connect to two APIs:
4
+ - Qlik Sense Repository Service (QRS) API
5
+ - Qlik Engine JSON API
6
+
7
+ We connect using the default Virtual Proxy Service (built into Qlik Sense Enterprise
8
+ default configuration, so no server-side configuration is required).
9
+
10
+ For auth, for the time being we use the Windows login method (NTLM), combined with the
11
+ "header" method of authenticating to these APIs.
12
+
13
+ In the case of the QRS API, we use the requests library to make HTTP requests, and use
14
+ the requests_ntlm library to handle NTLM authentication.
15
+
16
+ For the Qlik Engine JSON API, we use the websocket library to create a WebSocket
17
+ connection. To authenticate, we use a Qlik session ID that we get from calling a
18
+ QRS API endpoint (the endpoint doesn't matter, we only care that the server returns
19
+ a session ID in its response) using the method described above.
20
+ """
21
+
22
+ from collections.abc import Generator
23
+ import json
24
+ from pathlib import Path
25
+ import secrets
26
+ import ssl
27
+ import string
28
+ from typing import Any
29
+
30
+ import dlt
31
+ from dlt.extract.resource import DltResource
32
+ from loguru import logger
33
+ import requests
34
+ from requests_ntlm import HttpNtlmAuth
35
+ import websocket
36
+
37
+
38
+ # # Disable SSL warnings (optional but recommended for self-signed certs)
39
+ requests.packages.urllib3.disable_warnings()
40
+
41
+
42
+ def get_qrs_session(ntlm_username: str, ntlm_password: str) -> requests.Session:
43
+ """Create and configure a requests Session for QRS API."""
44
+ session = requests.Session()
45
+ # Probably a typo of xsrf; anyway, this is a random string of 16 alphanumeric chars.
46
+ alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits
47
+ xrfkey = "".join(secrets.choice(alphabet) for _ in range(16))
48
+ headers = {
49
+ "x-qlik-xrfkey": xrfkey,
50
+ "User-Agent": "Windows",
51
+ }
52
+ session.auth = HttpNtlmAuth(ntlm_username, ntlm_password)
53
+ session.headers = headers
54
+ session.params = {"xrfkey": xrfkey}
55
+ session.verify = False
56
+ return session
57
+
58
+
59
+ def get_qlik_session_id(
60
+ qrs_api_base_url: str, ntlm_username: str, ntlm_password: str
61
+ ) -> str:
62
+ """Retrieve Qlik session ID from the QRS API.
63
+
64
+ Raises:
65
+ ValueError: If session ID could not be retrieved.
66
+
67
+ Returns:
68
+ str: The Qlik session ID.
69
+ """
70
+ session = get_qrs_session(ntlm_username=ntlm_username, ntlm_password=ntlm_password)
71
+
72
+ logger.debug("Retrieving Qlik session ID...")
73
+
74
+ full_url = qrs_api_base_url + "/about"
75
+ response = session.get(full_url)
76
+ response.raise_for_status()
77
+ session_id = session.cookies.get("X-Qlik-Session")
78
+
79
+ if not session_id:
80
+ msg = "Could not retrieve Qlik session ID. Perhaps Qlik API's logic changed?"
81
+ raise ValueError(msg)
82
+
83
+ logger.debug(f"Retrieved Qlik session ID: {session_id}.")
84
+ return session_id
85
+
86
+
87
+ def get_socket(engine_api_url: str, session_id: str) -> websocket.WebSocket:
88
+ """Create a WebSocket connection to the Qlik Engine JSON API.
89
+
90
+ Args:
91
+ engine_api_url (str): The URL of the Qlik Engine JSON API.
92
+ session_id (str): The ID of the Qlik session to use.
93
+
94
+ Returns:
95
+ websocket.WebSocket: An authenticated socket connection.
96
+ """
97
+ # Get a Qlik session ID from QRS API.
98
+ # This way, we can (indirectly) use header auth in the Qlik Engine JSON API.
99
+ headers = {"Cookie": f"X-Qlik-Session={session_id}"}
100
+
101
+ # Connect to the Qlik Engine JSON API.
102
+ logger.info("Creating a socket...")
103
+
104
+ socket = websocket.WebSocket(sslopt={"cert_reqs": ssl.CERT_NONE})
105
+ socket.connect(engine_api_url, header=headers)
106
+ on_authentication_msg = json.loads(socket.recv())
107
+ logger.debug("On authentication message: ", on_authentication_msg)
108
+ if on_authentication_msg["params"]["mustAuthenticate"]:
109
+ msg = "Could not authenticate to Qlik Engine JSON API."
110
+ msg += " Please ensure you're authenticated and provide a valid session ID."
111
+ raise ValueError(msg)
112
+
113
+ on_connected_msg = socket.recv()
114
+ logger.debug("On connected message: ", on_connected_msg)
115
+
116
+ return socket
117
+
118
+
119
+ def get(
120
+ request: dict[str, Any],
121
+ engine_api_url: str,
122
+ session_id: str | None = None,
123
+ socket: websocket.WebSocket | None = None,
124
+ ) -> dict[str, Any]:
125
+ """Retrieve a response from the Qlik Engine JSON API using WebSocket.
126
+
127
+ Args:
128
+ request (dict[str, Any]): The request body.
129
+ engine_api_url (str): The URL of the Qlik Engine JSON API.
130
+ session_id (str | None, optional): The ID of the Qlik session to use. Defaults
131
+ to None.
132
+ socket (websocket.WebSocket | None, optional): The socket to use. Defaults to
133
+ None.
134
+
135
+ Raises:
136
+ ValueError: If neither a session nor a socket is provided, or if the socket is
137
+ specified, but without its session.
138
+
139
+ Returns:
140
+ dict[str, Any] | None: The response from Qlik Engine JSON API.
141
+ """
142
+ if not session_id and not socket:
143
+ msg = "Either `socket` or `session_id` must be provided."
144
+ raise ValueError(msg)
145
+
146
+ is_socket_externally_managed = bool(socket)
147
+ if not is_socket_externally_managed:
148
+ socket = get_socket(engine_api_url, session_id=session_id)
149
+
150
+ logger.info(f"Calling '{request['method']}' method...")
151
+ socket.send(json.dumps(request))
152
+ response = json.loads(socket.recv())
153
+
154
+ if not is_socket_externally_managed:
155
+ socket.close()
156
+
157
+ if machine_readable_error := response.get("error"):
158
+ error_code = machine_readable_error["code"]
159
+ error_message_short = machine_readable_error["message"]
160
+ error_message_long = machine_readable_error["parameter"]
161
+ human_readable_error = (
162
+ f"Error {error_code} ('{error_message_short}'). " + error_message_long + "."
163
+ )
164
+ raise ValueError(human_readable_error)
165
+
166
+ return response["result"]
167
+
168
+
169
+ @dlt.source
170
+ def qlik_sense(
171
+ qrs_api_base_url: str = dlt.secrets.value,
172
+ engine_api_url: str = dlt.secrets.value,
173
+ ntlm_username: str = dlt.secrets.value,
174
+ ntlm_password: str = dlt.secrets.value,
175
+ ) -> list[DltResource]:
176
+ """The Qlik Sense metadata source."""
177
+ # We use a single Qlik session for all requests.
178
+ qlik_session_id = get_qlik_session_id(
179
+ qrs_api_base_url, ntlm_username=ntlm_username, ntlm_password=ntlm_password
180
+ )
181
+
182
+ @dlt.resource(primary_key="qDocId", write_disposition="merge")
183
+ def apps_engine(
184
+ modified_at: dlt.sources.incremental = dlt.sources.incremental( # noqa: B008
185
+ "modifiedDate", initial_value="2024-01-01T00:00:00Z"
186
+ ),
187
+ ) -> DltResource:
188
+ """Get metadata about all apps in Qlik Sense from the Engine JSON API."""
189
+ query = {
190
+ "jsonrpc": "2.0",
191
+ # "id": 1,
192
+ "method": "GetDocList",
193
+ "handle": -1,
194
+ "params": [],
195
+ }
196
+ apps_info_nested = get(
197
+ query, session_id=qlik_session_id, engine_api_url=engine_api_url
198
+ )
199
+ # Unnest and return only the modified apps.
200
+ yield from [
201
+ {
202
+ "qDocId": app["qDocId"],
203
+ "qDocName": app["qDocName"],
204
+ "description": app["qMeta"]["description"],
205
+ "createdDate": app["qMeta"]["createdDate"],
206
+ "modifiedDate": app["qMeta"]["modifiedDate"],
207
+ "stream": {
208
+ "id": app["qMeta"]["stream"]["id"]
209
+ if app["qMeta"].get("stream")
210
+ else None,
211
+ "name": app["qMeta"]["stream"]["name"]
212
+ if app["qMeta"].get("stream")
213
+ else None,
214
+ },
215
+ }
216
+ for app in apps_info_nested["qDocList"]
217
+ if app["qMeta"]["modifiedDate"] > modified_at.start_value
218
+ ]
219
+
220
+ @dlt.transformer(data_from=apps_engine, primary_key="id", write_disposition="merge")
221
+ def app_details_qrs(app: dict[str, Any]) -> Generator[dict[str, Any], None, None]:
222
+ app_id = app["qDocId"]
223
+
224
+ logger.debug(f"Retrieving app {app_id} details from QRS API...")
225
+
226
+ session = get_qrs_session(
227
+ ntlm_username=ntlm_username, ntlm_password=ntlm_password
228
+ )
229
+ full_url = qrs_api_base_url + f"/app/{app_id}"
230
+ response = session.get(full_url)
231
+ response.raise_for_status()
232
+ yield response.json()
233
+
234
+ @dlt.transformer(data_from=apps_engine, primary_key="id", write_disposition="merge")
235
+ def app_details_engine(
236
+ app: dict[str, Any],
237
+ ) -> Generator[dict[str, Any], None, None]:
238
+ app_id = app["qDocId"]
239
+ # These two requests are connected, so we need to perform them on the same
240
+ # socket.
241
+ socket = get_socket(engine_api_url, session_id=qlik_session_id)
242
+ open_app_query = {
243
+ "jsonrpc": "2.0",
244
+ "id": 1,
245
+ "method": "OpenDoc",
246
+ "handle": -1,
247
+ "params": {"qDocName": app_id},
248
+ }
249
+ lineage_query = {
250
+ "jsonrpc": "2.0",
251
+ "id": 2,
252
+ "method": "GetLineage",
253
+ "handle": 1,
254
+ "params": {},
255
+ }
256
+
257
+ logger.debug(f"Retrieving app {app_id} details from Qlik Engine JSON API...")
258
+
259
+ try:
260
+ _ = get(open_app_query, socket=socket, engine_api_url=engine_api_url)
261
+ lineage = get(lineage_query, socket=socket, engine_api_url=engine_api_url)[
262
+ "qLineage"
263
+ ]
264
+ except Exception as e:
265
+ msg = (
266
+ f"Failed retrieving lineage for app: {app['qDocName']} (ID: {app_id})."
267
+ )
268
+ logger.exception(e, msg=msg)
269
+ finally:
270
+ socket.close()
271
+
272
+ # NOTE: We enrich the response with the app ID so that it can be handled
273
+ # properly.
274
+ yield {"id": app_id, "lineage": lineage}
275
+
276
+ return [apps_engine, app_details_qrs, app_details_engine]
277
+
278
+
279
+ if __name__ == "__main__":
280
+ # source = qlik_sense().add_limit(2)
281
+ from loguru import logger
282
+
283
+ source = qlik_sense()
284
+ # for apps_engine, app_details_qrs, app_details_engine in source:
285
+ # print(json.dumps(app_details_qrs, indent=4))
286
+ # print("-" * 30)
287
+ # print(json.dumps(app_details_engine, indent=4))
288
+ app_details_qrs = list(source.with_resources("app_details_qrs"))
289
+ app_details_engine = list(source.with_resources("app_details_engine"))
290
+ logger.info(f"{len(app_details_qrs)} apps in QRS API.")
291
+ logger.info(f"{len(app_details_engine)} apps in Engine JSON API.")
292
+
293
+ with Path("app_details_qrs.json").open("w", encoding="utf-8") as f:
294
+ json.dump(app_details_qrs, f, indent=4)
295
+
296
+ with Path("app_details_engine.json").open("w", encoding="utf-8") as f:
297
+ json.dump(app_details_engine, f, indent=4)
@@ -0,0 +1,22 @@
1
+ """Pydantic models for Qlik Sense APIs responses."""
2
+
3
+ from pydantic import BaseModel, EmailStr
4
+
5
+
6
+ class AppDetailsQRS(BaseModel):
7
+ id: str
8
+ name: str
9
+ description: str | None
10
+ owner: EmailStr | None
11
+ created_at: str
12
+ updated_at: str
13
+
14
+
15
+ class Table(BaseModel):
16
+ qDiscriminator: str
17
+ qStatement: str | None = None
18
+
19
+
20
+ class AppDetailsEngine(BaseModel):
21
+ app_id: str
22
+ lineage: list[Table]
@@ -0,0 +1,19 @@
1
+ """dlt pipeline to load Qlik Sense metadata into DuckDB."""
2
+
3
+ import dlt
4
+
5
+ from perspective.ingest.sources.bi.qlik_sense.extract import qlik_sense
6
+
7
+
8
+ pipe = dlt.pipeline(
9
+ pipeline_name="qlik_to_duckdb",
10
+ destination=dlt.destinations.duckdb("db.duckdb"),
11
+ dataset_name="qlik_sense",
12
+ )
13
+
14
+ if __name__ == "__main__":
15
+ from loguru import logger
16
+
17
+ # load_package = pipe.run(qlik().add_limit(5), refresh="drop_data") # For testing.
18
+ load_package = pipe.run(qlik_sense().add_limit(5))
19
+ logger.info(load_package)
@@ -0,0 +1,76 @@
1
+ """Transform Qlik Sense metadata into Perspective DashboardManifest format."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from perspective.ingest.sources.bi.qlik_sense.models import (
7
+ AppDetailsEngine,
8
+ AppDetailsQRS,
9
+ )
10
+ from perspective.models.dashboards import (
11
+ Dashboard,
12
+ DashboardManifest,
13
+ DashboardSchemaMetadata,
14
+ DataModel,
15
+ )
16
+
17
+
18
+ def transform(
19
+ app_details_qrs: AppDetailsQRS, app_details_engine: AppDetailsEngine
20
+ ) -> DashboardManifest:
21
+ # Extract tables from the Qlik Sense metadata.
22
+ # tables = extract_tables(workspace_info)
23
+ # reports = extract_reports(workspace_info, tables=tables)
24
+
25
+ # return DashboardManifest(
26
+ # metadata=DashboardSchemaMetadata(schema="dashboard", version=1),
27
+ # payload=reports,
28
+ # )
29
+ """Join app metadata from the two sources: QRS API and Engine JSON API.
30
+
31
+ Args:
32
+ app_details_qrs (list[AppDetailsQRS]): List of app details from QRS API.
33
+ app_details_engine (list[AppDetailsEngine]): List of app details from Engine
34
+ JSON API.
35
+ """
36
+ apps = []
37
+ for app in app_details_qrs:
38
+ app_parsed = {
39
+ "external_id": app["id"],
40
+ "url": f"{app['resourceId']}/hub/{app['id']}",
41
+ "type": "qliksense",
42
+ "name": app["name"],
43
+ "workspace": app.get("stream", {}).get("name", "My Work"),
44
+ "created_at": app.get("createdDate"),
45
+ "modified_at": app.get("modifiedDate"),
46
+ "owners": [
47
+ {
48
+ "user_id": app.get("owner", {}).get("userId", ""),
49
+ "username": app.get("owner", {}).get("userDirectory", ""),
50
+ "name": app.get("owner", {}).get("name", ""),
51
+ }
52
+ ],
53
+ "parent_models": [],
54
+ }
55
+
56
+ apps.append(app_parsed)
57
+
58
+ return DashboardManifest(
59
+ metadata=DashboardSchemaMetadata(schema="dashboard", version=1),
60
+ payload=apps,
61
+ )
62
+
63
+
64
+ if __name__ == "__main__":
65
+ from loguru import logger
66
+
67
+ with Path("app_details_qrs.json").open(encoding="utf-8") as f:
68
+ app_details_qrs = json.load(f)
69
+
70
+ with Path("app_details_engine.json").open(encoding="utf-8") as f:
71
+ app_details_engine = json.load(f)
72
+
73
+ dashboard_manifest = transform(
74
+ app_details_qrs=app_details_qrs, app_details_engine=app_details_engine
75
+ )
76
+ logger.info(dashboard_manifest)
@@ -0,0 +1,253 @@
1
+ import dlt
2
+
3
+
4
+ try:
5
+ import pyrfc
6
+ from pyrfc._exception import (
7
+ ABAPApplicationError,
8
+ ABAPRuntimeError,
9
+ CommunicationError,
10
+ )
11
+ except ModuleNotFoundError as e:
12
+ msg = "The 'pyrfc' package is required to use the SAPRFC source. "
13
+ raise ImportError(msg) from e
14
+ from itertools import batched
15
+ import string
16
+ import textwrap
17
+
18
+ from loguru import logger
19
+
20
+
21
+ @dlt.source(name="sap")
22
+ def sap(
23
+ ashost: str = dlt.secrets.value,
24
+ sysnr: str = dlt.secrets.value,
25
+ username: str = dlt.secrets.value,
26
+ passwd: str = dlt.secrets.value,
27
+ ):
28
+ """Query SAP with SQL using the RFC protocol.
29
+
30
+ Use the RFC_READ_TABLE to read SAP table metadata.
31
+ """
32
+ delimiter = "\t"
33
+
34
+ # Test the connection.
35
+ con = pyrfc.Connection(
36
+ ashost=ashost,
37
+ sysnr=sysnr,
38
+ user=username,
39
+ passwd=passwd,
40
+ )
41
+ logger.info("Checking the connection...")
42
+ try:
43
+ con.ping()
44
+ logger.info("Connection successful.")
45
+ except Exception as e:
46
+ logger.exception("Connection to SAP failed.")
47
+ raise
48
+ finally:
49
+ con.close()
50
+
51
+ # By convention, custom tables use a "Z" or "Y" prefix in their name.
52
+ custom_table_prefixes = ["Z", "Y"]
53
+ standard_table_prefixes = [
54
+ char for char in string.ascii_uppercase if char not in custom_table_prefixes
55
+ ]
56
+
57
+ def get_response(
58
+ params,
59
+ func: str = "RFC_READ_TABLE",
60
+ data_key: str = "DATA",
61
+ fallback_data_key: str | None = None,
62
+ record_key: str = "WA",
63
+ fallback_record_key: str | None = None,
64
+ ):
65
+ """Call the RFC function with the given parameters."""
66
+ con = pyrfc.Connection(
67
+ ashost=ashost,
68
+ sysnr=sysnr,
69
+ user=username,
70
+ passwd=passwd,
71
+ )
72
+ try:
73
+ response = con.call(func, **params)
74
+ except (ABAPApplicationError, ABAPRuntimeError, CommunicationError) as e:
75
+ if e.key == "DATA_BUFFER_EXCEEDED":
76
+ msg = "Character limit per row exceeded. Please select fewer columns."
77
+ elif e.key == "TSV_TNEW_PAGE_ALLOC_FAILED":
78
+ msg = "Memory allocation failed; try a smaller batch size."
79
+ else:
80
+ msg = f"Error while calling {func} with params:\n{params}."
81
+ logger.exception(msg)
82
+ raise
83
+ finally:
84
+ con.close()
85
+
86
+ # Process the response into records.
87
+ data_raw = response.get(data_key) or response.get(fallback_data_key)
88
+ data = [
89
+ [
90
+ value.strip()
91
+ for value in row.get(record_key, row.get(fallback_record_key)).split(
92
+ delimiter
93
+ )
94
+ ]
95
+ for row in data_raw
96
+ ]
97
+ logger.info(f"Retrieved {len(data)} rows.")
98
+ columns = params.get("FIELDS")
99
+ if columns:
100
+ return [dict(zip(columns, row)) for row in data]
101
+ return data
102
+
103
+ def get_table_schema(table_prefixes: list[str]):
104
+ conditions = [f"TABNAME LIKE '{prefix}%'" for prefix in table_prefixes]
105
+ # Each line in OPTIONS must be ≤72 characters.
106
+ grouped = textwrap.wrap(" OR ".join(conditions), width=72)
107
+ options = [{"TEXT": line} for line in grouped]
108
+ # Filter out internal SAP objects.
109
+ options += [{"TEXT": " AND TABCLASS = 'TRANSP'"}]
110
+ params = {
111
+ "QUERY_TABLE": "DD02L",
112
+ "FIELDS": ["TABNAME"],
113
+ "OPTIONS": options,
114
+ "DELIMITER": delimiter,
115
+ }
116
+ yield get_response(params)
117
+
118
+ @dlt.resource(
119
+ name="standard_tables", write_disposition="merge", primary_key="TABNAME"
120
+ )
121
+ def standard_tables():
122
+ """Get a list of standard SAP tables."""
123
+ yield get_table_schema(standard_table_prefixes)
124
+
125
+ @dlt.resource(write_disposition="merge", primary_key="TABNAME")
126
+ def custom_tables():
127
+ """Get a list of custom SAP tables."""
128
+ yield get_table_schema(custom_table_prefixes)
129
+
130
+ def get_table_details(table_names):
131
+ """Get metadata about SAP tables."""
132
+ table_batches = batched(table_names, 1000)
133
+ for batch_number, batch in enumerate(table_batches, start=1):
134
+ conditions = [f"TABNAME = '{table}'" for table in batch]
135
+ # Each line in OPTIONS must be ≤72 characters.
136
+ grouped = textwrap.wrap(" OR ".join(conditions), width=72)
137
+ options = [{"TEXT": line} for line in grouped]
138
+ params = {
139
+ "QUERY_TABLE": "DD03L",
140
+ "FIELDS": ["TABNAME", "FIELDNAME", "DATATYPE", "LENG"],
141
+ "OPTIONS": options,
142
+ "DELIMITER": delimiter,
143
+ }
144
+ logger.info(f"Extracting table batch number {batch_number}...")
145
+ yield get_response(params)
146
+
147
+ @dlt.transformer(
148
+ data_from=standard_tables,
149
+ write_disposition="merge",
150
+ primary_key=("TABNAME", "FIELDNAME"),
151
+ )
152
+ def standard_tables_details(standard_tables):
153
+ standard_table_names = [row["TABNAME"] for row in standard_tables]
154
+ yield get_table_details(standard_table_names)
155
+
156
+ @dlt.transformer(
157
+ data_from=custom_tables,
158
+ write_disposition="merge",
159
+ primary_key=("TABNAME", "FIELDNAME"),
160
+ )
161
+ def custom_tables_details(custom_tables):
162
+ custom_table_names = [row["TABNAME"] for row in custom_tables]
163
+ yield get_table_details(custom_table_names)
164
+
165
+ @dlt.resource(write_disposition="merge", primary_key="ROLLNAME")
166
+ def column_details():
167
+ """Get metadata about table columns in SAP tables."""
168
+ params = {
169
+ "QUERY_TABLE": "DD04T",
170
+ "FIELDS": ["ROLLNAME", "DDTEXT"],
171
+ "OPTIONS": [{"TEXT": "DDLANGUAGE = 'EN'"}],
172
+ "DELIMITER": delimiter,
173
+ }
174
+ logger.info("Extracting table columns...")
175
+ yield get_response(params)
176
+
177
+ @dlt.resource(write_disposition="replace")
178
+ def abap_programs():
179
+ """List all ABAP programs (reports, includes, module pools, etc.)."""
180
+ # Iterate 5 letters at a time as SAP can't handle large queries,
181
+ # and it seems there can be millions of ABAP programs.
182
+ logger.info("Extracting ABAP programs...")
183
+ for prefixes_group in batched(string.ascii_uppercase, 5):
184
+ logger.info(
185
+ f"Extracting programs with the following prefixes: {prefixes_group}..."
186
+ )
187
+ conditions = [f"NAME LIKE '{prefix}%'" for prefix in prefixes_group]
188
+
189
+ # Each line in OPTIONS must be ≤72 characters.
190
+ grouped = textwrap.wrap(" OR ".join(conditions), width=72)
191
+ options = [{"TEXT": line} for line in grouped]
192
+ # '1' = Executable report
193
+ # 'I' = Include
194
+ # 'M' = Module pool (dialog program)
195
+ # 'F' = Function group main program (needed for function modules)
196
+ # 'K' = Class pool (needed for global classes)
197
+ # 'J' = Interface pool
198
+ # 'S' = Subroutine pool
199
+ options += [{"TEXT": " AND SUBC IN ('1','I','M','F','K','J','S')"}]
200
+ # NOTE: all these filters only remove ~5% of records.
201
+ params = {
202
+ "QUERY_TABLE": "TRDIR",
203
+ "FIELDS": ["NAME"],
204
+ "OPTIONS": options,
205
+ "DELIMITER": delimiter,
206
+ }
207
+ yield get_response(params)
208
+
209
+ @dlt.transformer(
210
+ data_from=abap_programs, write_disposition="merge", primary_key="name"
211
+ )
212
+ def abap_programs_source_code(abap_programs):
213
+ logger.info("Extracting the source code of ABAP programs...")
214
+ program_names = (row["NAME"] for row in abap_programs)
215
+ for program in program_names:
216
+ params = {"PROGRAM_NAME": program}
217
+ logger.info(f"Extracting source for ABAP program: {program}...")
218
+ source_code_lines = get_response(
219
+ params,
220
+ func="RPY_PROGRAM_READ",
221
+ data_key="SOURCE",
222
+ fallback_data_key="SOURCE_EXTENDED",
223
+ fallback_record_key="LINE",
224
+ )
225
+ yield {
226
+ "name": program,
227
+ "source": "\n".join([line[0] for line in source_code_lines]),
228
+ }
229
+
230
+ @dlt.resource(write_disposition="replace")
231
+ def abap_transactions():
232
+ pass
233
+
234
+ return [
235
+ standard_tables,
236
+ standard_tables_details,
237
+ custom_tables,
238
+ custom_tables_details,
239
+ column_details,
240
+ abap_programs,
241
+ abap_programs_source_code,
242
+ ]
243
+
244
+
245
+ if __name__ == "__main__":
246
+ pipeline = dlt.pipeline(
247
+ pipeline_name="sap", destination="duckdb", dataset_name="bronze"
248
+ )
249
+ # pipeline.run(sap.with_resources("column_details", "custom_tables_details"))
250
+ # pipeline.run(sap().with_resources("column_details", "standard_tables_details"))
251
+ pipeline.run(sap().with_resources("abap_programs_source_code"))
252
+ # pipeline.run(sap().add_limit(1).with_resources("abap_programs"))
253
+ # pipeline.run(sap().with_resources("abap_programs"))
@@ -0,0 +1,23 @@
1
+ from typing import Generator
2
+
3
+ from loguru import logger
4
+
5
+ from lumaCLI.metadata.models.database import DatabaseTableManifest
6
+ from lumaCLI.metadata.sources.sap.extract import sap
7
+ from lumaCLI.metadata.sources.sap.transform import transform
8
+
9
+
10
+ def pipeline() -> Generator[DatabaseTableManifest, None, None]:
11
+ """Pipeline to extract SAP metadata and."""
12
+ source = sap().with_resources("column_details", "custom_tables_details")
13
+ manifest_batches = transform()
14
+ yield from manifest_batches
15
+
16
+
17
+ if __name__ == "__main__":
18
+ manifest_batches = pipeline()
19
+ for i, manifest in enumerate(manifest_batches):
20
+ logger.info(f"Writing {len(manifest.payload)} tables to batch {i} manifest...")
21
+
22
+ with open(f"database_table_manifest__batch_{i}.json", "w") as f:
23
+ f.write(manifest.json())