pbixray 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pbixray-0.1.12/MANIFEST.in +3 -0
- pbixray-0.1.12/PKG-INFO +106 -0
- pbixray-0.1.12/README.md +78 -0
- pbixray-0.1.12/pbixray/__init__.py +1 -0
- pbixray-0.1.12/pbixray/abf/__init__.py +0 -0
- pbixray-0.1.12/pbixray/abf/backup_log.py +48 -0
- pbixray-0.1.12/pbixray/abf/backup_log_header.py +24 -0
- pbixray-0.1.12/pbixray/abf/data_model.py +7 -0
- pbixray-0.1.12/pbixray/abf/parser.py +63 -0
- pbixray-0.1.12/pbixray/abf/virtual_directory.py +20 -0
- pbixray-0.1.12/pbixray/column_data/__init__.py +0 -0
- pbixray-0.1.12/pbixray/column_data/dictionary.py +246 -0
- pbixray-0.1.12/pbixray/column_data/hidx.py +97 -0
- pbixray-0.1.12/pbixray/column_data/idf.py +55 -0
- pbixray-0.1.12/pbixray/column_data/idfmeta.py +183 -0
- pbixray-0.1.12/pbixray/core.py +52 -0
- pbixray-0.1.12/pbixray/huffman.py +79 -0
- pbixray-0.1.12/pbixray/lib/libxpress9.dll +0 -0
- pbixray-0.1.12/pbixray/lib/libxpress9.dylib +0 -0
- pbixray-0.1.12/pbixray/lib/libxpress9.so +0 -0
- pbixray-0.1.12/pbixray/meta/__init__.py +0 -0
- pbixray-0.1.12/pbixray/meta/metadata_handler.py +57 -0
- pbixray-0.1.12/pbixray/meta/metadata_query.py +85 -0
- pbixray-0.1.12/pbixray/meta/sqlite_handler.py +30 -0
- pbixray-0.1.12/pbixray/pbix_unpacker.py +99 -0
- pbixray-0.1.12/pbixray/utils.py +24 -0
- pbixray-0.1.12/pbixray/vertipaq_decoder.py +190 -0
- pbixray-0.1.12/pbixray.egg-info/PKG-INFO +106 -0
- pbixray-0.1.12/pbixray.egg-info/SOURCES.txt +33 -0
- pbixray-0.1.12/pbixray.egg-info/dependency_links.txt +1 -0
- pbixray-0.1.12/pbixray.egg-info/requires.txt +3 -0
- pbixray-0.1.12/pbixray.egg-info/top_level.txt +1 -0
- pbixray-0.1.12/setup.cfg +4 -0
- pbixray-0.1.12/setup.py +40 -0
- pbixray-0.1.12/test/test_basic_operation.py +27 -0
pbixray-0.1.12/PKG-INFO
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pbixray
|
|
3
|
+
Version: 0.1.12
|
|
4
|
+
Summary: A Python library to parse and analyze PBIX files used with Microsoft Power BI.
|
|
5
|
+
Home-page: https://github.com/Hugoberry/pbixray
|
|
6
|
+
Author: Igor Cotruta
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Operating System :: OS Independent
|
|
23
|
+
Requires-Python: >=3.6
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Requires-Dist: kaitaistruct
|
|
26
|
+
Requires-Dist: pandas
|
|
27
|
+
Requires-Dist: apsw
|
|
28
|
+
|
|
29
|
+
# PBIXRay
|
|
30
|
+
|
|
31
|
+
## Overview
|
|
32
|
+
|
|
33
|
+
PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install apsw kaitaistruct pbixray
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Getting Started
|
|
44
|
+
To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
|
|
45
|
+
```python
|
|
46
|
+
from pbixray import PBIXRay
|
|
47
|
+
|
|
48
|
+
model = PBIXRay('path/to/your/file.pbix')
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Features and Usage
|
|
52
|
+
### Tables
|
|
53
|
+
To list all tables in the model:
|
|
54
|
+
```python
|
|
55
|
+
tables = model.tables
|
|
56
|
+
print(tables)
|
|
57
|
+
```
|
|
58
|
+
### Metadata
|
|
59
|
+
To get metadata about the Power BI configuration used during model creation:
|
|
60
|
+
```python
|
|
61
|
+
metadata = model.metadata
|
|
62
|
+
print(metadata)
|
|
63
|
+
```
|
|
64
|
+
### Power Query
|
|
65
|
+
To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
|
|
66
|
+
```python
|
|
67
|
+
power_query = model.power_query
|
|
68
|
+
print(power_query)
|
|
69
|
+
```
|
|
70
|
+
### Model Size
|
|
71
|
+
To find out the model size in bytes:
|
|
72
|
+
```python
|
|
73
|
+
size = model.size
|
|
74
|
+
print(f"Model size: {size} bytes")
|
|
75
|
+
```
|
|
76
|
+
### DAX Calculated Tables
|
|
77
|
+
To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
|
|
78
|
+
```python
|
|
79
|
+
dax_tables = model.dax_tables
|
|
80
|
+
print(dax_tables)
|
|
81
|
+
```
|
|
82
|
+
### DAX Measures
|
|
83
|
+
To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
|
|
84
|
+
```python
|
|
85
|
+
dax_measures = model.dax_measures
|
|
86
|
+
print(dax_measures)
|
|
87
|
+
```
|
|
88
|
+
### Schema
|
|
89
|
+
To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
|
|
90
|
+
```python
|
|
91
|
+
schema = model.schema
|
|
92
|
+
print(schema)
|
|
93
|
+
```
|
|
94
|
+
### Get Table Contents
|
|
95
|
+
To retrieve the contents of a specified table:
|
|
96
|
+
```python
|
|
97
|
+
table_name = 'YourTableName'
|
|
98
|
+
table_contents = model.get_table(table_name)
|
|
99
|
+
print(table_contents)
|
|
100
|
+
```
|
|
101
|
+
### Statistics
|
|
102
|
+
To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
|
|
103
|
+
```python
|
|
104
|
+
statistics = model.statistics
|
|
105
|
+
print(statistics)
|
|
106
|
+
```
|
pbixray-0.1.12/README.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# PBIXRay
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install apsw kaitaistruct pbixray
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Getting Started
|
|
16
|
+
To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
|
|
17
|
+
```python
|
|
18
|
+
from pbixray import PBIXRay
|
|
19
|
+
|
|
20
|
+
model = PBIXRay('path/to/your/file.pbix')
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Features and Usage
|
|
24
|
+
### Tables
|
|
25
|
+
To list all tables in the model:
|
|
26
|
+
```python
|
|
27
|
+
tables = model.tables
|
|
28
|
+
print(tables)
|
|
29
|
+
```
|
|
30
|
+
### Metadata
|
|
31
|
+
To get metadata about the Power BI configuration used during model creation:
|
|
32
|
+
```python
|
|
33
|
+
metadata = model.metadata
|
|
34
|
+
print(metadata)
|
|
35
|
+
```
|
|
36
|
+
### Power Query
|
|
37
|
+
To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
|
|
38
|
+
```python
|
|
39
|
+
power_query = model.power_query
|
|
40
|
+
print(power_query)
|
|
41
|
+
```
|
|
42
|
+
### Model Size
|
|
43
|
+
To find out the model size in bytes:
|
|
44
|
+
```python
|
|
45
|
+
size = model.size
|
|
46
|
+
print(f"Model size: {size} bytes")
|
|
47
|
+
```
|
|
48
|
+
### DAX Calculated Tables
|
|
49
|
+
To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
|
|
50
|
+
```python
|
|
51
|
+
dax_tables = model.dax_tables
|
|
52
|
+
print(dax_tables)
|
|
53
|
+
```
|
|
54
|
+
### DAX Measures
|
|
55
|
+
To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
|
|
56
|
+
```python
|
|
57
|
+
dax_measures = model.dax_measures
|
|
58
|
+
print(dax_measures)
|
|
59
|
+
```
|
|
60
|
+
### Schema
|
|
61
|
+
To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
|
|
62
|
+
```python
|
|
63
|
+
schema = model.schema
|
|
64
|
+
print(schema)
|
|
65
|
+
```
|
|
66
|
+
### Get Table Contents
|
|
67
|
+
To retrieve the contents of a specified table:
|
|
68
|
+
```python
|
|
69
|
+
table_name = 'YourTableName'
|
|
70
|
+
table_contents = model.get_table(table_name)
|
|
71
|
+
print(table_contents)
|
|
72
|
+
```
|
|
73
|
+
### Statistics
|
|
74
|
+
To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
|
|
75
|
+
```python
|
|
76
|
+
statistics = model.statistics
|
|
77
|
+
print(statistics)
|
|
78
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .core import PBIXRay
|
|
File without changes
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
|
|
3
|
+
class BackupLog:
|
|
4
|
+
def __init__(self, xml_string, error_code):
|
|
5
|
+
#if error_code trim last 4 bytes
|
|
6
|
+
if error_code:
|
|
7
|
+
xml_string = xml_string[:-4]
|
|
8
|
+
trimmed_xml_string = xml_string.decode('utf-16')
|
|
9
|
+
root = ET.fromstring(trimmed_xml_string)
|
|
10
|
+
self.BackupRestoreSyncVersion = root.findtext("BackupRestoreSyncVersion")
|
|
11
|
+
self.ServerRoot = root.findtext("ServerRoot")
|
|
12
|
+
self.SvrEncryptPwdFlag = root.findtext("SvrEncryptPwdFlag") == "true"
|
|
13
|
+
self.ServerEnableBinaryXML = root.findtext("ServerEnableBinaryXML") == "true"
|
|
14
|
+
self.ServerEnableCompression = root.findtext("ServerEnableCompression") == "true"
|
|
15
|
+
self.CompressionFlag = root.findtext("CompressionFlag") == "true"
|
|
16
|
+
self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
|
|
17
|
+
self.ObjectName = root.findtext("ObjectName")
|
|
18
|
+
self.ObjectId = root.findtext("ObjectId")
|
|
19
|
+
self.Write = root.findtext("Write")
|
|
20
|
+
self.OlapInfo = root.findtext("OlapInfo") == "true"
|
|
21
|
+
self.Collations = [collation.text for collation in root.findall("Collations/Collation")]
|
|
22
|
+
self.Languages = [int(lang.text) for lang in root.findall("Languages/Language")]
|
|
23
|
+
self.FileGroups = [FileGroup.from_xml(filegroup) for filegroup in root.findall("FileGroups/FileGroup")]
|
|
24
|
+
|
|
25
|
+
class FileGroup:
|
|
26
|
+
@classmethod
|
|
27
|
+
def from_xml(cls, element):
|
|
28
|
+
fileGroup = cls()
|
|
29
|
+
fileGroup.Class = int(element.findtext("Class"))
|
|
30
|
+
fileGroup.ID = element.findtext("ID")
|
|
31
|
+
fileGroup.Name = element.findtext("Name")
|
|
32
|
+
fileGroup.ObjectVersion = int(element.findtext("ObjectVersion"))
|
|
33
|
+
fileGroup.PersistLocation = int(element.findtext("PersistLocation"))
|
|
34
|
+
fileGroup.PersistLocationPath = element.findtext("PersistLocationPath")
|
|
35
|
+
fileGroup.StorageLocationPath = element.findtext("StorageLocationPath")
|
|
36
|
+
fileGroup.ObjectID = element.findtext("ObjectID")
|
|
37
|
+
fileGroup.FileList = [BackupFile.from_xml(backupfile) for backupfile in element.findall("FileList/BackupFile")]
|
|
38
|
+
return fileGroup
|
|
39
|
+
|
|
40
|
+
class BackupFile:
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_xml(cls, element):
|
|
43
|
+
backupFile = cls()
|
|
44
|
+
backupFile.Path = element.findtext("Path")
|
|
45
|
+
backupFile.StoragePath = element.findtext("StoragePath")
|
|
46
|
+
backupFile.LastWriteTime = int(element.findtext("LastWriteTime"))
|
|
47
|
+
backupFile.Size = int(element.findtext("Size"))
|
|
48
|
+
return backupFile
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
|
|
3
|
+
class BackupLogHeader:
|
|
4
|
+
def __init__(self, xml_string):
|
|
5
|
+
# header is always one page (4096 bytes) in size and padded with zeros between the last header element and the end of the page
|
|
6
|
+
trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
|
|
7
|
+
root = ET.fromstring(trimmed_xml_string)
|
|
8
|
+
self.BackupRestoreSyncVersion = int(root.findtext("BackupRestoreSyncVersion"))
|
|
9
|
+
self.Fault = root.findtext("Fault") == "true"
|
|
10
|
+
self.faultcode = int(root.findtext("faultcode"))
|
|
11
|
+
self.ErrorCode = root.findtext("ErrorCode") == "true"
|
|
12
|
+
self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
|
|
13
|
+
self.EncryptionKey = int(root.findtext("EncryptionKey"))
|
|
14
|
+
self.ApplyCompression = root.findtext("ApplyCompression") == "true"
|
|
15
|
+
self.m_cbOffsetHeader = int(root.findtext("m_cbOffsetHeader"))
|
|
16
|
+
self.DataSize = int(root.findtext("DataSize"))
|
|
17
|
+
self.Files = int(root.findtext("Files"))
|
|
18
|
+
self.ObjectID = root.findtext("ObjectID")
|
|
19
|
+
self.m_cbOffsetData = int(root.findtext("m_cbOffsetData"))
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_xml_string(cls, xml_string):
|
|
23
|
+
trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
|
|
24
|
+
return cls(trimmed_xml_string)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
from .backup_log import BackupLog
|
|
3
|
+
from .backup_log_header import BackupLogHeader
|
|
4
|
+
from .virtual_directory import VirtualDirectory
|
|
5
|
+
from .data_model import DataModel
|
|
6
|
+
|
|
7
|
+
class AbfParser:
|
|
8
|
+
def __init__(self, data_model:DataModel):
|
|
9
|
+
self.data_model = data_model
|
|
10
|
+
self.__buffer = data_model.decompressed_data
|
|
11
|
+
self.__backup_log_header = None
|
|
12
|
+
self.__virtual_directory = None
|
|
13
|
+
self.__backup_log = None
|
|
14
|
+
self.file_log = None # public attribute
|
|
15
|
+
|
|
16
|
+
# Trigger the parsing process upon initialization
|
|
17
|
+
self.__parse_all()
|
|
18
|
+
|
|
19
|
+
def __parse_all(self):
|
|
20
|
+
self.__parse_backup_log_header()
|
|
21
|
+
self.__parse_virtual_directory()
|
|
22
|
+
self.__parse_backup_log()
|
|
23
|
+
self.__match_logs_and_get_attributes()
|
|
24
|
+
|
|
25
|
+
def __parse_backup_log_header(self):
|
|
26
|
+
offset = 72 # STREAM_STORAGE_SIGNATURE_)!@#$%^&*(
|
|
27
|
+
page = 0x1000 # header is always one page (4096 bytes) in size
|
|
28
|
+
self.__backup_log_header = BackupLogHeader(self.__buffer[offset:page])
|
|
29
|
+
|
|
30
|
+
def __parse_virtual_directory(self):
|
|
31
|
+
offset = int(self.__backup_log_header.m_cbOffsetHeader)
|
|
32
|
+
size = int(self.__backup_log_header.DataSize)
|
|
33
|
+
self.__virtual_directory = VirtualDirectory(self.__buffer[offset: offset+size])
|
|
34
|
+
|
|
35
|
+
def __parse_backup_log(self):
|
|
36
|
+
log = self.__virtual_directory.BackupFiles[-1]
|
|
37
|
+
offset = int(log.m_cbOffsetHeader)
|
|
38
|
+
size = int(log.Size)
|
|
39
|
+
self.data_model.error_code = self.__backup_log_header.ErrorCode
|
|
40
|
+
self.__backup_log = BackupLog(self.__buffer[offset:offset+size], self.__backup_log_header.ErrorCode)
|
|
41
|
+
|
|
42
|
+
def __match_logs_and_get_attributes(self):
|
|
43
|
+
persist_root = self.__backup_log.FileGroups[1].PersistLocationPath + '\\'
|
|
44
|
+
virtual_directory_files_by_path = {file.Path: file for file in self.__virtual_directory.BackupFiles}
|
|
45
|
+
matched_data = []
|
|
46
|
+
|
|
47
|
+
for file_group in self.__backup_log.FileGroups:
|
|
48
|
+
for backup_file in file_group.FileList:
|
|
49
|
+
if backup_file.StoragePath in virtual_directory_files_by_path:
|
|
50
|
+
matched_file = virtual_directory_files_by_path[backup_file.StoragePath]
|
|
51
|
+
if backup_file.Path.startswith(persist_root):
|
|
52
|
+
path_without_persist_root = backup_file.Path.replace(persist_root, '', 1)
|
|
53
|
+
else:
|
|
54
|
+
path_without_persist_root = backup_file.Path
|
|
55
|
+
matched_data.append({
|
|
56
|
+
'Path': path_without_persist_root,
|
|
57
|
+
'FileName': path_without_persist_root.split('\\')[-1],
|
|
58
|
+
'StoragePath': backup_file.StoragePath,
|
|
59
|
+
'Size': matched_file.Size,
|
|
60
|
+
'm_cbOffsetHeader': matched_file.m_cbOffsetHeader
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
self.data_model.file_log = matched_data
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
|
|
3
|
+
class VirtualDirectoryBackupFile:
|
|
4
|
+
def __init__(self, element):
|
|
5
|
+
self.Path = element.findtext("Path")
|
|
6
|
+
self.Size = int(element.findtext("Size"))
|
|
7
|
+
self.m_cbOffsetHeader = int(element.findtext("m_cbOffsetHeader"))
|
|
8
|
+
self.Delete = element.findtext("Delete") == "true"
|
|
9
|
+
self.CreatedTimestamp = int(element.findtext("CreatedTimestamp"))
|
|
10
|
+
self.Access = int(element.findtext("Access"))
|
|
11
|
+
self.LastWriteTime = int(element.findtext("LastWriteTime"))
|
|
12
|
+
|
|
13
|
+
class VirtualDirectory:
|
|
14
|
+
def __init__(self, xml_string):
|
|
15
|
+
root = ET.fromstring(xml_string)
|
|
16
|
+
self.BackupFiles = [VirtualDirectoryBackupFile(e) for e in root.findall("BackupFile")]
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_xml_string(cls, xml_string):
|
|
20
|
+
return cls(xml_string)
|
|
File without changes
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
|
|
2
|
+
|
|
3
|
+
import kaitaistruct
|
|
4
|
+
from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 9):
|
|
9
|
+
raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__))
|
|
10
|
+
|
|
11
|
+
class ColumnDataDictionary(KaitaiStruct):
|
|
12
|
+
|
|
13
|
+
class DictionaryTypes(Enum):
|
|
14
|
+
xm_type_invalid = -1
|
|
15
|
+
xm_type_long = 0
|
|
16
|
+
xm_type_real = 1
|
|
17
|
+
xm_type_string = 2
|
|
18
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
19
|
+
self._io = _io
|
|
20
|
+
self._parent = _parent
|
|
21
|
+
self._root = _root if _root else self
|
|
22
|
+
self._read()
|
|
23
|
+
|
|
24
|
+
def _read(self):
|
|
25
|
+
self.dictionary_type = KaitaiStream.resolve_enum(ColumnDataDictionary.DictionaryTypes, self._io.read_s4le())
|
|
26
|
+
self.hash_information = ColumnDataDictionary.HashInfo(self._io, self, self._root)
|
|
27
|
+
_on = self.dictionary_type
|
|
28
|
+
if _on == ColumnDataDictionary.DictionaryTypes.xm_type_string:
|
|
29
|
+
self.data = ColumnDataDictionary.StringData(self._io, self, self._root)
|
|
30
|
+
elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_long:
|
|
31
|
+
self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
|
|
32
|
+
elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_real:
|
|
33
|
+
self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
|
|
34
|
+
|
|
35
|
+
class StringRecordHandle(KaitaiStruct):
|
|
36
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
37
|
+
self._io = _io
|
|
38
|
+
self._parent = _parent
|
|
39
|
+
self._root = _root if _root else self
|
|
40
|
+
self._read()
|
|
41
|
+
|
|
42
|
+
def _read(self):
|
|
43
|
+
self.bit_or_byte_offset = self._io.read_u4le()
|
|
44
|
+
self.page_id = self._io.read_u4le()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class StringData(KaitaiStruct):
|
|
48
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
49
|
+
self._io = _io
|
|
50
|
+
self._parent = _parent
|
|
51
|
+
self._root = _root if _root else self
|
|
52
|
+
self._read()
|
|
53
|
+
|
|
54
|
+
def _read(self):
|
|
55
|
+
self.page_layout_information = ColumnDataDictionary.PageLayout(self._io, self, self._root)
|
|
56
|
+
self.dictionary_pages = []
|
|
57
|
+
for i in range(self.page_layout_information.store_page_count):
|
|
58
|
+
self.dictionary_pages.append(ColumnDataDictionary.DictionaryPage(self._io, self, self._root))
|
|
59
|
+
|
|
60
|
+
self.dictionary_record_handles_vector_info = ColumnDataDictionary.DictionaryRecordHandlesVector(self._io, self, self._root)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class HashInfo(KaitaiStruct):
|
|
64
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
65
|
+
self._io = _io
|
|
66
|
+
self._parent = _parent
|
|
67
|
+
self._root = _root if _root else self
|
|
68
|
+
self._read()
|
|
69
|
+
|
|
70
|
+
def _read(self):
|
|
71
|
+
self.hash_elements = []
|
|
72
|
+
for i in range(6):
|
|
73
|
+
self.hash_elements.append(self._io.read_s4le())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class VectorOfVectors(KaitaiStruct):
|
|
78
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
79
|
+
self._io = _io
|
|
80
|
+
self._parent = _parent
|
|
81
|
+
self._root = _root if _root else self
|
|
82
|
+
self._read()
|
|
83
|
+
|
|
84
|
+
def _read(self):
|
|
85
|
+
self.element_count = self._io.read_u8le()
|
|
86
|
+
self.element_size = self._io.read_u4le()
|
|
87
|
+
self.values = []
|
|
88
|
+
for i in range(self.element_count):
|
|
89
|
+
_on = self.data_type_id
|
|
90
|
+
if _on == u"int32":
|
|
91
|
+
self.values.append(self._io.read_s4le())
|
|
92
|
+
elif _on == u"int64":
|
|
93
|
+
self.values.append(self._io.read_s8le())
|
|
94
|
+
elif _on == u"float64":
|
|
95
|
+
self.values.append(self._io.read_f8le())
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def is_int32(self):
|
|
100
|
+
if hasattr(self, '_m_is_int32'):
|
|
101
|
+
return self._m_is_int32
|
|
102
|
+
|
|
103
|
+
self._m_is_int32 = self.element_size == 4
|
|
104
|
+
return getattr(self, '_m_is_int32', None)
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def is_int64(self):
|
|
108
|
+
if hasattr(self, '_m_is_int64'):
|
|
109
|
+
return self._m_is_int64
|
|
110
|
+
|
|
111
|
+
self._m_is_int64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_long))
|
|
112
|
+
return getattr(self, '_m_is_int64', None)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def is_float64(self):
|
|
116
|
+
if hasattr(self, '_m_is_float64'):
|
|
117
|
+
return self._m_is_float64
|
|
118
|
+
|
|
119
|
+
self._m_is_float64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_real))
|
|
120
|
+
return getattr(self, '_m_is_float64', None)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def data_type_id(self):
|
|
124
|
+
if hasattr(self, '_m_data_type_id'):
|
|
125
|
+
return self._m_data_type_id
|
|
126
|
+
|
|
127
|
+
self._m_data_type_id = (u"int32" if self.is_int32 else (u"int64" if self.is_int64 else u"float64"))
|
|
128
|
+
return getattr(self, '_m_data_type_id', None)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class CompressedStrings(KaitaiStruct):
|
|
132
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
133
|
+
self._io = _io
|
|
134
|
+
self._parent = _parent
|
|
135
|
+
self._root = _root if _root else self
|
|
136
|
+
self._read()
|
|
137
|
+
|
|
138
|
+
def _read(self):
|
|
139
|
+
self.store_total_bits = self._io.read_u4le()
|
|
140
|
+
self.character_set_type_identifier = self._io.read_u4le()
|
|
141
|
+
self.allocation_size = self._io.read_u8le()
|
|
142
|
+
self.character_set_used = self._io.read_u1()
|
|
143
|
+
self.ui_decode_bits = self._io.read_u4le()
|
|
144
|
+
self.encode_array = []
|
|
145
|
+
for i in range(128):
|
|
146
|
+
self.encode_array.append(self._io.read_u1())
|
|
147
|
+
|
|
148
|
+
self.ui64_buffer_size = self._io.read_u8le()
|
|
149
|
+
self.compressed_string_buffer = self._io.read_bytes(self.allocation_size)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class PageLayout(KaitaiStruct):
|
|
153
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
154
|
+
self._io = _io
|
|
155
|
+
self._parent = _parent
|
|
156
|
+
self._root = _root if _root else self
|
|
157
|
+
self._read()
|
|
158
|
+
|
|
159
|
+
def _read(self):
|
|
160
|
+
self.store_string_count = self._io.read_s8le()
|
|
161
|
+
self.f_store_compressed = self._io.read_s1()
|
|
162
|
+
self.store_longest_string = self._io.read_s8le()
|
|
163
|
+
self.store_page_count = self._io.read_s8le()
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DictionaryPage(KaitaiStruct):
|
|
167
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
168
|
+
self._io = _io
|
|
169
|
+
self._parent = _parent
|
|
170
|
+
self._root = _root if _root else self
|
|
171
|
+
self._read()
|
|
172
|
+
|
|
173
|
+
def _read(self):
|
|
174
|
+
self.page_mask = self._io.read_u8le()
|
|
175
|
+
self.page_contains_nulls = self._io.read_u1()
|
|
176
|
+
self.page_start_index = self._io.read_u8le()
|
|
177
|
+
self.page_string_count = self._io.read_u8le()
|
|
178
|
+
self.page_compressed = self._io.read_u1()
|
|
179
|
+
self.string_store_begin_mark = self._io.read_bytes(4)
|
|
180
|
+
if not self.string_store_begin_mark == b"\xDD\xCC\xBB\xAA":
|
|
181
|
+
raise kaitaistruct.ValidationNotEqualError(b"\xDD\xCC\xBB\xAA", self.string_store_begin_mark, self._io, u"/types/dictionary_page/seq/5")
|
|
182
|
+
_on = self.page_compressed
|
|
183
|
+
if _on == 0:
|
|
184
|
+
self.string_store = ColumnDataDictionary.UncompressedStrings(self._io, self, self._root)
|
|
185
|
+
elif _on == 1:
|
|
186
|
+
self.string_store = ColumnDataDictionary.CompressedStrings(self._io, self, self._root)
|
|
187
|
+
self.string_store_end_mark = self._io.read_bytes(4)
|
|
188
|
+
if not self.string_store_end_mark == b"\xCD\xAB\xCD\xAB":
|
|
189
|
+
raise kaitaistruct.ValidationNotEqualError(b"\xCD\xAB\xCD\xAB", self.string_store_end_mark, self._io, u"/types/dictionary_page/seq/7")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class OtherRecordHandle(KaitaiStruct):
|
|
193
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
194
|
+
self._io = _io
|
|
195
|
+
self._parent = _parent
|
|
196
|
+
self._root = _root if _root else self
|
|
197
|
+
self._read()
|
|
198
|
+
|
|
199
|
+
def _read(self):
|
|
200
|
+
self.bit_or_byte_offset = self._io.read_u4le()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class UncompressedStrings(KaitaiStruct):
|
|
204
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
205
|
+
self._io = _io
|
|
206
|
+
self._parent = _parent
|
|
207
|
+
self._root = _root if _root else self
|
|
208
|
+
self._read()
|
|
209
|
+
|
|
210
|
+
def _read(self):
|
|
211
|
+
self.remaining_store_available = self._io.read_u8le()
|
|
212
|
+
self.buffer_used_characters = self._io.read_u8le()
|
|
213
|
+
self.allocation_size = self._io.read_u8le()
|
|
214
|
+
self.uncompressed_character_buffer = (self._io.read_bytes(self.allocation_size)).decode(u"UTF-16LE")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class NumberData(KaitaiStruct):
|
|
218
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
219
|
+
self._io = _io
|
|
220
|
+
self._parent = _parent
|
|
221
|
+
self._root = _root if _root else self
|
|
222
|
+
self._read()
|
|
223
|
+
|
|
224
|
+
def _read(self):
|
|
225
|
+
self.vector_of_vectors_info = ColumnDataDictionary.VectorOfVectors(self._io, self, self._root)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class DictionaryRecordHandlesVector(KaitaiStruct):
|
|
229
|
+
def __init__(self, _io, _parent=None, _root=None):
|
|
230
|
+
self._io = _io
|
|
231
|
+
self._parent = _parent
|
|
232
|
+
self._root = _root if _root else self
|
|
233
|
+
self._read()
|
|
234
|
+
|
|
235
|
+
def _read(self):
|
|
236
|
+
self.element_count = self._io.read_u8le()
|
|
237
|
+
self.element_size = self._io.read_bytes(4)
|
|
238
|
+
if not self.element_size == b"\x08\x00\x00\x00":
|
|
239
|
+
raise kaitaistruct.ValidationNotEqualError(b"\x08\x00\x00\x00", self.element_size, self._io, u"/types/dictionary_record_handles_vector/seq/1")
|
|
240
|
+
self.vector_of_record_handle_structures = []
|
|
241
|
+
for i in range(self.element_count):
|
|
242
|
+
self.vector_of_record_handle_structures.append(ColumnDataDictionary.StringRecordHandle(self._io, self, self._root))
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
|