pbixray 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. pbixray-0.1.12/MANIFEST.in +3 -0
  2. pbixray-0.1.12/PKG-INFO +106 -0
  3. pbixray-0.1.12/README.md +78 -0
  4. pbixray-0.1.12/pbixray/__init__.py +1 -0
  5. pbixray-0.1.12/pbixray/abf/__init__.py +0 -0
  6. pbixray-0.1.12/pbixray/abf/backup_log.py +48 -0
  7. pbixray-0.1.12/pbixray/abf/backup_log_header.py +24 -0
  8. pbixray-0.1.12/pbixray/abf/data_model.py +7 -0
  9. pbixray-0.1.12/pbixray/abf/parser.py +63 -0
  10. pbixray-0.1.12/pbixray/abf/virtual_directory.py +20 -0
  11. pbixray-0.1.12/pbixray/column_data/__init__.py +0 -0
  12. pbixray-0.1.12/pbixray/column_data/dictionary.py +246 -0
  13. pbixray-0.1.12/pbixray/column_data/hidx.py +97 -0
  14. pbixray-0.1.12/pbixray/column_data/idf.py +55 -0
  15. pbixray-0.1.12/pbixray/column_data/idfmeta.py +183 -0
  16. pbixray-0.1.12/pbixray/core.py +52 -0
  17. pbixray-0.1.12/pbixray/huffman.py +79 -0
  18. pbixray-0.1.12/pbixray/lib/libxpress9.dll +0 -0
  19. pbixray-0.1.12/pbixray/lib/libxpress9.dylib +0 -0
  20. pbixray-0.1.12/pbixray/lib/libxpress9.so +0 -0
  21. pbixray-0.1.12/pbixray/meta/__init__.py +0 -0
  22. pbixray-0.1.12/pbixray/meta/metadata_handler.py +57 -0
  23. pbixray-0.1.12/pbixray/meta/metadata_query.py +85 -0
  24. pbixray-0.1.12/pbixray/meta/sqlite_handler.py +30 -0
  25. pbixray-0.1.12/pbixray/pbix_unpacker.py +99 -0
  26. pbixray-0.1.12/pbixray/utils.py +24 -0
  27. pbixray-0.1.12/pbixray/vertipaq_decoder.py +190 -0
  28. pbixray-0.1.12/pbixray.egg-info/PKG-INFO +106 -0
  29. pbixray-0.1.12/pbixray.egg-info/SOURCES.txt +33 -0
  30. pbixray-0.1.12/pbixray.egg-info/dependency_links.txt +1 -0
  31. pbixray-0.1.12/pbixray.egg-info/requires.txt +3 -0
  32. pbixray-0.1.12/pbixray.egg-info/top_level.txt +1 -0
  33. pbixray-0.1.12/setup.cfg +4 -0
  34. pbixray-0.1.12/setup.py +40 -0
  35. pbixray-0.1.12/test/test_basic_operation.py +27 -0
@@ -0,0 +1,3 @@
1
+ include pbixray/lib/libxpress9.dll
2
+ include pbixray/lib/libxpress9.so
3
+ include pbixray/lib/libxpress9.dylib
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.1
2
+ Name: pbixray
3
+ Version: 0.1.12
4
+ Summary: A Python library to parse and analyze PBIX files used with Microsoft Power BI.
5
+ Home-page: https://github.com/Hugoberry/pbixray
6
+ Author: Igor Cotruta
7
+ License: MIT
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
14
+ Classifier: Topic :: Scientific/Engineering :: Visualization
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.6
18
+ Classifier: Programming Language :: Python :: 3.7
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Operating System :: OS Independent
23
+ Requires-Python: >=3.6
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: kaitaistruct
26
+ Requires-Dist: pandas
27
+ Requires-Dist: apsw
28
+
29
+ # PBIXRay
30
+
31
+ ## Overview
32
+
33
+ PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
34
+
35
+ ## Installation
36
+
37
+ Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
38
+
39
+ ```bash
40
+ pip install apsw kaitaistruct pbixray
41
+ ```
42
+
43
+ ## Getting Started
44
+ To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
45
+ ```python
46
+ from pbixray import PBIXRay
47
+
48
+ model = PBIXRay('path/to/your/file.pbix')
49
+ ```
50
+
51
+ ## Features and Usage
52
+ ### Tables
53
+ To list all tables in the model:
54
+ ```python
55
+ tables = model.tables
56
+ print(tables)
57
+ ```
58
+ ### Metadata
59
+ To get metadata about the Power BI configuration used during model creation:
60
+ ```python
61
+ metadata = model.metadata
62
+ print(metadata)
63
+ ```
64
+ ### Power Query
65
+ To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
66
+ ```python
67
+ power_query = model.power_query
68
+ print(power_query)
69
+ ```
70
+ ### Model Size
71
+ To find out the model size in bytes:
72
+ ```python
73
+ size = model.size
74
+ print(f"Model size: {size} bytes")
75
+ ```
76
+ ### DAX Calculated Tables
77
+ To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
78
+ ```python
79
+ dax_tables = model.dax_tables
80
+ print(dax_tables)
81
+ ```
82
+ ### DAX Measures
83
+ To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
84
+ ```python
85
+ dax_measures = model.dax_measures
86
+ print(dax_measures)
87
+ ```
88
+ ### Schema
89
+ To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
90
+ ```python
91
+ schema = model.schema
92
+ print(schema)
93
+ ```
94
+ ### Get Table Contents
95
+ To retrieve the contents of a specified table:
96
+ ```python
97
+ table_name = 'YourTableName'
98
+ table_contents = model.get_table(table_name)
99
+ print(table_contents)
100
+ ```
101
+ ### Statistics
102
+ To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
103
+ ```python
104
+ statistics = model.statistics
105
+ print(statistics)
106
+ ```
@@ -0,0 +1,78 @@
1
+ # PBIXRay
2
+
3
+ ## Overview
4
+
5
+ PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
6
+
7
+ ## Installation
8
+
9
+ Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
10
+
11
+ ```bash
12
+ pip install apsw kaitaistruct pbixray
13
+ ```
14
+
15
+ ## Getting Started
16
+ To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
17
+ ```python
18
+ from pbixray import PBIXRay
19
+
20
+ model = PBIXRay('path/to/your/file.pbix')
21
+ ```
22
+
23
+ ## Features and Usage
24
+ ### Tables
25
+ To list all tables in the model:
26
+ ```python
27
+ tables = model.tables
28
+ print(tables)
29
+ ```
30
+ ### Metadata
31
+ To get metadata about the Power BI configuration used during model creation:
32
+ ```python
33
+ metadata = model.metadata
34
+ print(metadata)
35
+ ```
36
+ ### Power Query
37
+ To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
38
+ ```python
39
+ power_query = model.power_query
40
+ print(power_query)
41
+ ```
42
+ ### Model Size
43
+ To find out the model size in bytes:
44
+ ```python
45
+ size = model.size
46
+ print(f"Model size: {size} bytes")
47
+ ```
48
+ ### DAX Calculated Tables
49
+ To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
50
+ ```python
51
+ dax_tables = model.dax_tables
52
+ print(dax_tables)
53
+ ```
54
+ ### DAX Measures
55
+ To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
56
+ ```python
57
+ dax_measures = model.dax_measures
58
+ print(dax_measures)
59
+ ```
60
+ ### Schema
61
+ To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
62
+ ```python
63
+ schema = model.schema
64
+ print(schema)
65
+ ```
66
+ ### Get Table Contents
67
+ To retrieve the contents of a specified table:
68
+ ```python
69
+ table_name = 'YourTableName'
70
+ table_contents = model.get_table(table_name)
71
+ print(table_contents)
72
+ ```
73
+ ### Statistics
74
+ To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
75
+ ```python
76
+ statistics = model.statistics
77
+ print(statistics)
78
+ ```
@@ -0,0 +1 @@
1
+ from .core import PBIXRay
File without changes
@@ -0,0 +1,48 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class BackupLog:
4
+ def __init__(self, xml_string, error_code):
5
+ #if error_code trim last 4 bytes
6
+ if error_code:
7
+ xml_string = xml_string[:-4]
8
+ trimmed_xml_string = xml_string.decode('utf-16')
9
+ root = ET.fromstring(trimmed_xml_string)
10
+ self.BackupRestoreSyncVersion = root.findtext("BackupRestoreSyncVersion")
11
+ self.ServerRoot = root.findtext("ServerRoot")
12
+ self.SvrEncryptPwdFlag = root.findtext("SvrEncryptPwdFlag") == "true"
13
+ self.ServerEnableBinaryXML = root.findtext("ServerEnableBinaryXML") == "true"
14
+ self.ServerEnableCompression = root.findtext("ServerEnableCompression") == "true"
15
+ self.CompressionFlag = root.findtext("CompressionFlag") == "true"
16
+ self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
17
+ self.ObjectName = root.findtext("ObjectName")
18
+ self.ObjectId = root.findtext("ObjectId")
19
+ self.Write = root.findtext("Write")
20
+ self.OlapInfo = root.findtext("OlapInfo") == "true"
21
+ self.Collations = [collation.text for collation in root.findall("Collations/Collation")]
22
+ self.Languages = [int(lang.text) for lang in root.findall("Languages/Language")]
23
+ self.FileGroups = [FileGroup.from_xml(filegroup) for filegroup in root.findall("FileGroups/FileGroup")]
24
+
25
+ class FileGroup:
26
+ @classmethod
27
+ def from_xml(cls, element):
28
+ fileGroup = cls()
29
+ fileGroup.Class = int(element.findtext("Class"))
30
+ fileGroup.ID = element.findtext("ID")
31
+ fileGroup.Name = element.findtext("Name")
32
+ fileGroup.ObjectVersion = int(element.findtext("ObjectVersion"))
33
+ fileGroup.PersistLocation = int(element.findtext("PersistLocation"))
34
+ fileGroup.PersistLocationPath = element.findtext("PersistLocationPath")
35
+ fileGroup.StorageLocationPath = element.findtext("StorageLocationPath")
36
+ fileGroup.ObjectID = element.findtext("ObjectID")
37
+ fileGroup.FileList = [BackupFile.from_xml(backupfile) for backupfile in element.findall("FileList/BackupFile")]
38
+ return fileGroup
39
+
40
+ class BackupFile:
41
+ @classmethod
42
+ def from_xml(cls, element):
43
+ backupFile = cls()
44
+ backupFile.Path = element.findtext("Path")
45
+ backupFile.StoragePath = element.findtext("StoragePath")
46
+ backupFile.LastWriteTime = int(element.findtext("LastWriteTime"))
47
+ backupFile.Size = int(element.findtext("Size"))
48
+ return backupFile
@@ -0,0 +1,24 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class BackupLogHeader:
4
+ def __init__(self, xml_string):
5
+ # header is always one page (4096 bytes) in size and padded with zeros between the last header element and the end of the page
6
+ trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
7
+ root = ET.fromstring(trimmed_xml_string)
8
+ self.BackupRestoreSyncVersion = int(root.findtext("BackupRestoreSyncVersion"))
9
+ self.Fault = root.findtext("Fault") == "true"
10
+ self.faultcode = int(root.findtext("faultcode"))
11
+ self.ErrorCode = root.findtext("ErrorCode") == "true"
12
+ self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
13
+ self.EncryptionKey = int(root.findtext("EncryptionKey"))
14
+ self.ApplyCompression = root.findtext("ApplyCompression") == "true"
15
+ self.m_cbOffsetHeader = int(root.findtext("m_cbOffsetHeader"))
16
+ self.DataSize = int(root.findtext("DataSize"))
17
+ self.Files = int(root.findtext("Files"))
18
+ self.ObjectID = root.findtext("ObjectID")
19
+ self.m_cbOffsetData = int(root.findtext("m_cbOffsetData"))
20
+
21
+ @classmethod
22
+ def from_xml_string(cls, xml_string):
23
+ trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
24
+ return cls(trimmed_xml_string)
@@ -0,0 +1,7 @@
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class DataModel:
5
+ file_log: list
6
+ decompressed_data: bytes
7
+ error_code: bool = False
@@ -0,0 +1,63 @@
1
+ import xml.etree.ElementTree as ET
2
+ from .backup_log import BackupLog
3
+ from .backup_log_header import BackupLogHeader
4
+ from .virtual_directory import VirtualDirectory
5
+ from .data_model import DataModel
6
+
7
+ class AbfParser:
8
+ def __init__(self, data_model:DataModel):
9
+ self.data_model = data_model
10
+ self.__buffer = data_model.decompressed_data
11
+ self.__backup_log_header = None
12
+ self.__virtual_directory = None
13
+ self.__backup_log = None
14
+ self.file_log = None # public attribute
15
+
16
+ # Trigger the parsing process upon initialization
17
+ self.__parse_all()
18
+
19
+ def __parse_all(self):
20
+ self.__parse_backup_log_header()
21
+ self.__parse_virtual_directory()
22
+ self.__parse_backup_log()
23
+ self.__match_logs_and_get_attributes()
24
+
25
+ def __parse_backup_log_header(self):
26
+ offset = 72 # STREAM_STORAGE_SIGNATURE_)!@#$%^&*(
27
+ page = 0x1000 # header is always one page (4096 bytes) in size
28
+ self.__backup_log_header = BackupLogHeader(self.__buffer[offset:page])
29
+
30
+ def __parse_virtual_directory(self):
31
+ offset = int(self.__backup_log_header.m_cbOffsetHeader)
32
+ size = int(self.__backup_log_header.DataSize)
33
+ self.__virtual_directory = VirtualDirectory(self.__buffer[offset: offset+size])
34
+
35
+ def __parse_backup_log(self):
36
+ log = self.__virtual_directory.BackupFiles[-1]
37
+ offset = int(log.m_cbOffsetHeader)
38
+ size = int(log.Size)
39
+ self.data_model.error_code = self.__backup_log_header.ErrorCode
40
+ self.__backup_log = BackupLog(self.__buffer[offset:offset+size], self.__backup_log_header.ErrorCode)
41
+
42
+ def __match_logs_and_get_attributes(self):
43
+ persist_root = self.__backup_log.FileGroups[1].PersistLocationPath + '\\'
44
+ virtual_directory_files_by_path = {file.Path: file for file in self.__virtual_directory.BackupFiles}
45
+ matched_data = []
46
+
47
+ for file_group in self.__backup_log.FileGroups:
48
+ for backup_file in file_group.FileList:
49
+ if backup_file.StoragePath in virtual_directory_files_by_path:
50
+ matched_file = virtual_directory_files_by_path[backup_file.StoragePath]
51
+ if backup_file.Path.startswith(persist_root):
52
+ path_without_persist_root = backup_file.Path.replace(persist_root, '', 1)
53
+ else:
54
+ path_without_persist_root = backup_file.Path
55
+ matched_data.append({
56
+ 'Path': path_without_persist_root,
57
+ 'FileName': path_without_persist_root.split('\\')[-1],
58
+ 'StoragePath': backup_file.StoragePath,
59
+ 'Size': matched_file.Size,
60
+ 'm_cbOffsetHeader': matched_file.m_cbOffsetHeader
61
+ })
62
+
63
+ self.data_model.file_log = matched_data
@@ -0,0 +1,20 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class VirtualDirectoryBackupFile:
4
+ def __init__(self, element):
5
+ self.Path = element.findtext("Path")
6
+ self.Size = int(element.findtext("Size"))
7
+ self.m_cbOffsetHeader = int(element.findtext("m_cbOffsetHeader"))
8
+ self.Delete = element.findtext("Delete") == "true"
9
+ self.CreatedTimestamp = int(element.findtext("CreatedTimestamp"))
10
+ self.Access = int(element.findtext("Access"))
11
+ self.LastWriteTime = int(element.findtext("LastWriteTime"))
12
+
13
+ class VirtualDirectory:
14
+ def __init__(self, xml_string):
15
+ root = ET.fromstring(xml_string)
16
+ self.BackupFiles = [VirtualDirectoryBackupFile(e) for e in root.findall("BackupFile")]
17
+
18
+ @classmethod
19
+ def from_xml_string(cls, xml_string):
20
+ return cls(xml_string)
File without changes
@@ -0,0 +1,246 @@
1
+ # This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
2
+
3
+ import kaitaistruct
4
+ from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
5
+ from enum import Enum
6
+
7
+
8
+ if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 9):
9
+ raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__))
10
+
11
+ class ColumnDataDictionary(KaitaiStruct):
12
+
13
+ class DictionaryTypes(Enum):
14
+ xm_type_invalid = -1
15
+ xm_type_long = 0
16
+ xm_type_real = 1
17
+ xm_type_string = 2
18
+ def __init__(self, _io, _parent=None, _root=None):
19
+ self._io = _io
20
+ self._parent = _parent
21
+ self._root = _root if _root else self
22
+ self._read()
23
+
24
+ def _read(self):
25
+ self.dictionary_type = KaitaiStream.resolve_enum(ColumnDataDictionary.DictionaryTypes, self._io.read_s4le())
26
+ self.hash_information = ColumnDataDictionary.HashInfo(self._io, self, self._root)
27
+ _on = self.dictionary_type
28
+ if _on == ColumnDataDictionary.DictionaryTypes.xm_type_string:
29
+ self.data = ColumnDataDictionary.StringData(self._io, self, self._root)
30
+ elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_long:
31
+ self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
32
+ elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_real:
33
+ self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
34
+
35
+ class StringRecordHandle(KaitaiStruct):
36
+ def __init__(self, _io, _parent=None, _root=None):
37
+ self._io = _io
38
+ self._parent = _parent
39
+ self._root = _root if _root else self
40
+ self._read()
41
+
42
+ def _read(self):
43
+ self.bit_or_byte_offset = self._io.read_u4le()
44
+ self.page_id = self._io.read_u4le()
45
+
46
+
47
+ class StringData(KaitaiStruct):
48
+ def __init__(self, _io, _parent=None, _root=None):
49
+ self._io = _io
50
+ self._parent = _parent
51
+ self._root = _root if _root else self
52
+ self._read()
53
+
54
+ def _read(self):
55
+ self.page_layout_information = ColumnDataDictionary.PageLayout(self._io, self, self._root)
56
+ self.dictionary_pages = []
57
+ for i in range(self.page_layout_information.store_page_count):
58
+ self.dictionary_pages.append(ColumnDataDictionary.DictionaryPage(self._io, self, self._root))
59
+
60
+ self.dictionary_record_handles_vector_info = ColumnDataDictionary.DictionaryRecordHandlesVector(self._io, self, self._root)
61
+
62
+
63
+ class HashInfo(KaitaiStruct):
64
+ def __init__(self, _io, _parent=None, _root=None):
65
+ self._io = _io
66
+ self._parent = _parent
67
+ self._root = _root if _root else self
68
+ self._read()
69
+
70
+ def _read(self):
71
+ self.hash_elements = []
72
+ for i in range(6):
73
+ self.hash_elements.append(self._io.read_s4le())
74
+
75
+
76
+
77
+ class VectorOfVectors(KaitaiStruct):
78
+ def __init__(self, _io, _parent=None, _root=None):
79
+ self._io = _io
80
+ self._parent = _parent
81
+ self._root = _root if _root else self
82
+ self._read()
83
+
84
+ def _read(self):
85
+ self.element_count = self._io.read_u8le()
86
+ self.element_size = self._io.read_u4le()
87
+ self.values = []
88
+ for i in range(self.element_count):
89
+ _on = self.data_type_id
90
+ if _on == u"int32":
91
+ self.values.append(self._io.read_s4le())
92
+ elif _on == u"int64":
93
+ self.values.append(self._io.read_s8le())
94
+ elif _on == u"float64":
95
+ self.values.append(self._io.read_f8le())
96
+
97
+
98
+ @property
99
+ def is_int32(self):
100
+ if hasattr(self, '_m_is_int32'):
101
+ return self._m_is_int32
102
+
103
+ self._m_is_int32 = self.element_size == 4
104
+ return getattr(self, '_m_is_int32', None)
105
+
106
+ @property
107
+ def is_int64(self):
108
+ if hasattr(self, '_m_is_int64'):
109
+ return self._m_is_int64
110
+
111
+ self._m_is_int64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_long))
112
+ return getattr(self, '_m_is_int64', None)
113
+
114
+ @property
115
+ def is_float64(self):
116
+ if hasattr(self, '_m_is_float64'):
117
+ return self._m_is_float64
118
+
119
+ self._m_is_float64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_real))
120
+ return getattr(self, '_m_is_float64', None)
121
+
122
+ @property
123
+ def data_type_id(self):
124
+ if hasattr(self, '_m_data_type_id'):
125
+ return self._m_data_type_id
126
+
127
+ self._m_data_type_id = (u"int32" if self.is_int32 else (u"int64" if self.is_int64 else u"float64"))
128
+ return getattr(self, '_m_data_type_id', None)
129
+
130
+
131
+ class CompressedStrings(KaitaiStruct):
132
+ def __init__(self, _io, _parent=None, _root=None):
133
+ self._io = _io
134
+ self._parent = _parent
135
+ self._root = _root if _root else self
136
+ self._read()
137
+
138
+ def _read(self):
139
+ self.store_total_bits = self._io.read_u4le()
140
+ self.character_set_type_identifier = self._io.read_u4le()
141
+ self.allocation_size = self._io.read_u8le()
142
+ self.character_set_used = self._io.read_u1()
143
+ self.ui_decode_bits = self._io.read_u4le()
144
+ self.encode_array = []
145
+ for i in range(128):
146
+ self.encode_array.append(self._io.read_u1())
147
+
148
+ self.ui64_buffer_size = self._io.read_u8le()
149
+ self.compressed_string_buffer = self._io.read_bytes(self.allocation_size)
150
+
151
+
152
+ class PageLayout(KaitaiStruct):
153
+ def __init__(self, _io, _parent=None, _root=None):
154
+ self._io = _io
155
+ self._parent = _parent
156
+ self._root = _root if _root else self
157
+ self._read()
158
+
159
+ def _read(self):
160
+ self.store_string_count = self._io.read_s8le()
161
+ self.f_store_compressed = self._io.read_s1()
162
+ self.store_longest_string = self._io.read_s8le()
163
+ self.store_page_count = self._io.read_s8le()
164
+
165
+
166
+ class DictionaryPage(KaitaiStruct):
167
+ def __init__(self, _io, _parent=None, _root=None):
168
+ self._io = _io
169
+ self._parent = _parent
170
+ self._root = _root if _root else self
171
+ self._read()
172
+
173
+ def _read(self):
174
+ self.page_mask = self._io.read_u8le()
175
+ self.page_contains_nulls = self._io.read_u1()
176
+ self.page_start_index = self._io.read_u8le()
177
+ self.page_string_count = self._io.read_u8le()
178
+ self.page_compressed = self._io.read_u1()
179
+ self.string_store_begin_mark = self._io.read_bytes(4)
180
+ if not self.string_store_begin_mark == b"\xDD\xCC\xBB\xAA":
181
+ raise kaitaistruct.ValidationNotEqualError(b"\xDD\xCC\xBB\xAA", self.string_store_begin_mark, self._io, u"/types/dictionary_page/seq/5")
182
+ _on = self.page_compressed
183
+ if _on == 0:
184
+ self.string_store = ColumnDataDictionary.UncompressedStrings(self._io, self, self._root)
185
+ elif _on == 1:
186
+ self.string_store = ColumnDataDictionary.CompressedStrings(self._io, self, self._root)
187
+ self.string_store_end_mark = self._io.read_bytes(4)
188
+ if not self.string_store_end_mark == b"\xCD\xAB\xCD\xAB":
189
+ raise kaitaistruct.ValidationNotEqualError(b"\xCD\xAB\xCD\xAB", self.string_store_end_mark, self._io, u"/types/dictionary_page/seq/7")
190
+
191
+
192
+ class OtherRecordHandle(KaitaiStruct):
193
+ def __init__(self, _io, _parent=None, _root=None):
194
+ self._io = _io
195
+ self._parent = _parent
196
+ self._root = _root if _root else self
197
+ self._read()
198
+
199
+ def _read(self):
200
+ self.bit_or_byte_offset = self._io.read_u4le()
201
+
202
+
203
+ class UncompressedStrings(KaitaiStruct):
204
+ def __init__(self, _io, _parent=None, _root=None):
205
+ self._io = _io
206
+ self._parent = _parent
207
+ self._root = _root if _root else self
208
+ self._read()
209
+
210
+ def _read(self):
211
+ self.remaining_store_available = self._io.read_u8le()
212
+ self.buffer_used_characters = self._io.read_u8le()
213
+ self.allocation_size = self._io.read_u8le()
214
+ self.uncompressed_character_buffer = (self._io.read_bytes(self.allocation_size)).decode(u"UTF-16LE")
215
+
216
+
217
+ class NumberData(KaitaiStruct):
218
+ def __init__(self, _io, _parent=None, _root=None):
219
+ self._io = _io
220
+ self._parent = _parent
221
+ self._root = _root if _root else self
222
+ self._read()
223
+
224
+ def _read(self):
225
+ self.vector_of_vectors_info = ColumnDataDictionary.VectorOfVectors(self._io, self, self._root)
226
+
227
+
228
+ class DictionaryRecordHandlesVector(KaitaiStruct):
229
+ def __init__(self, _io, _parent=None, _root=None):
230
+ self._io = _io
231
+ self._parent = _parent
232
+ self._root = _root if _root else self
233
+ self._read()
234
+
235
+ def _read(self):
236
+ self.element_count = self._io.read_u8le()
237
+ self.element_size = self._io.read_bytes(4)
238
+ if not self.element_size == b"\x08\x00\x00\x00":
239
+ raise kaitaistruct.ValidationNotEqualError(b"\x08\x00\x00\x00", self.element_size, self._io, u"/types/dictionary_record_handles_vector/seq/1")
240
+ self.vector_of_record_handle_structures = []
241
+ for i in range(self.element_count):
242
+ self.vector_of_record_handle_structures.append(ColumnDataDictionary.StringRecordHandle(self._io, self, self._root))
243
+
244
+
245
+
246
+