pbixray 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. pbixray-0.1.1/LICENSE +21 -0
  2. pbixray-0.1.1/PKG-INFO +137 -0
  3. pbixray-0.1.1/README.md +98 -0
  4. pbixray-0.1.1/pbixray/__init__.py +1 -0
  5. pbixray-0.1.1/pbixray/abf/__init__.py +0 -0
  6. pbixray-0.1.1/pbixray/abf/backup_log.py +48 -0
  7. pbixray-0.1.1/pbixray/abf/backup_log_header.py +24 -0
  8. pbixray-0.1.1/pbixray/abf/data_model.py +8 -0
  9. pbixray-0.1.1/pbixray/abf/parser.py +65 -0
  10. pbixray-0.1.1/pbixray/abf/virtual_directory.py +20 -0
  11. pbixray-0.1.1/pbixray/column_data/__init__.py +0 -0
  12. pbixray-0.1.1/pbixray/column_data/dictionary.py +246 -0
  13. pbixray-0.1.1/pbixray/column_data/hidx.py +97 -0
  14. pbixray-0.1.1/pbixray/column_data/idf.py +55 -0
  15. pbixray-0.1.1/pbixray/column_data/idfmeta.py +184 -0
  16. pbixray-0.1.1/pbixray/core.py +71 -0
  17. pbixray-0.1.1/pbixray/huffman.py +79 -0
  18. pbixray-0.1.1/pbixray/meta/__init__.py +0 -0
  19. pbixray-0.1.1/pbixray/meta/metadata_handler.py +62 -0
  20. pbixray-0.1.1/pbixray/meta/metadata_query.py +151 -0
  21. pbixray-0.1.1/pbixray/meta/sqlite_handler.py +30 -0
  22. pbixray-0.1.1/pbixray/pbix_unpacker.py +192 -0
  23. pbixray-0.1.1/pbixray/utils.py +42 -0
  24. pbixray-0.1.1/pbixray/vertipaq_decoder.py +213 -0
  25. pbixray-0.1.1/pbixray/xpress8.py +159 -0
  26. pbixray-0.1.1/pbixray.egg-info/PKG-INFO +137 -0
  27. pbixray-0.1.1/pbixray.egg-info/SOURCES.txt +31 -0
  28. pbixray-0.1.1/pbixray.egg-info/dependency_links.txt +1 -0
  29. pbixray-0.1.1/pbixray.egg-info/requires.txt +4 -0
  30. pbixray-0.1.1/pbixray.egg-info/top_level.txt +1 -0
  31. pbixray-0.1.1/setup.cfg +4 -0
  32. pbixray-0.1.1/setup.py +38 -0
  33. pbixray-0.1.1/test/test_basic_operation.py +31 -0
pbixray-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Igor Cotruta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pbixray-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.2
2
+ Name: pbixray
3
+ Version: 0.1.1
4
+ Summary: A Python library to parse and analyze PBIX files used with Microsoft Power BI.
5
+ Home-page: https://github.com/Hugoberry/pbixray
6
+ Author: Igor Cotruta
7
+ License: MIT
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
14
+ Classifier: Topic :: Scientific/Engineering :: Visualization
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Operating System :: OS Independent
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: xpress9
27
+ Requires-Dist: kaitaistruct
28
+ Requires-Dist: pandas
29
+ Requires-Dist: apsw
30
+ Dynamic: author
31
+ Dynamic: classifier
32
+ Dynamic: description
33
+ Dynamic: description-content-type
34
+ Dynamic: home-page
35
+ Dynamic: license
36
+ Dynamic: requires-dist
37
+ Dynamic: requires-python
38
+ Dynamic: summary
39
+
40
+ # PBIXRay
41
+ [![Downloads](https://static.pepy.tech/badge/pbixray)](https://pepy.tech/project/pbixray)
42
+ ## Overview
43
+
44
+ PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
45
+
46
+ This library is the Python implementation of the logic embedded in the DuckDB extension [duckdb-pbix-extension](https://github.com/Hugoberry/duckdb-pbix-extension/).
47
+
48
+ ## Installation
49
+
50
+ Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
51
+
52
+ ```bash
53
+ pip install pbixray
54
+ ```
55
+
56
+ ## Getting Started
57
+ To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
58
+ ```python
59
+ from pbixray import PBIXRay
60
+
61
+ model = PBIXRay('path/to/your/file.pbix')
62
+ ```
63
+
64
+ ## Features and Usage
65
+ ### Tables
66
+ To list all tables in the model:
67
+ ```python
68
+ tables = model.tables
69
+ print(tables)
70
+ ```
71
+ ### Metadata
72
+ To get metadata about the Power BI configuration used during model creation:
73
+ ```python
74
+ metadata = model.metadata
75
+ print(metadata)
76
+ ```
77
+ ### Power Query
78
+ To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
79
+ ```python
80
+ power_query = model.power_query
81
+ print(power_query)
82
+ ```
83
+ ### M Parameters
84
+ To display all M Parameters values in a dataframe with `ParameterName`, `Description`, `Expression` and `ModifiedTime` columns:
85
+ ```python
86
+ m_parameters = model.m_parameters
87
+ print(m_parameters)
88
+ ```
89
+ ### Model Size
90
+ To find out the model size in bytes:
91
+ ```python
92
+ size = model.size
93
+ print(f"Model size: {size} bytes")
94
+ ```
95
+ ### DAX Calculated Tables
96
+ To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
97
+ ```python
98
+ dax_tables = model.dax_tables
99
+ print(dax_tables)
100
+ ```
101
+ ### DAX Measures
102
+ To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
103
+ ```python
104
+ dax_measures = model.dax_measures
105
+ print(dax_measures)
106
+ ```
107
+ ### Calculated Columns
108
+ To access calculated column DAX expressions in a dataframe with `TableName`,`ColumnName` and `Expression` columns:
109
+ ```python
110
+ dax_columns = model.dax_columns
111
+ print(dax_columns)
112
+ ```
113
+ ### Schema
114
+ To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
115
+ ```python
116
+ schema = model.schema
117
+ print(schema)
118
+ ```
119
+ ### Relationships
120
+ To get the details about the data model relationships in a dataframe with `FromTableName`, `FromColumnName`, `ToTableName`, `ToColumnName`, `IsActive`, `Cardinality`, `CrossFilteringBehavior`, `FromKeyCount`, `ToKeyCount` and `RelyOnReferentialIntegrity` columns:
121
+ ```python
122
+ relationships = model.relationships
123
+ print(relationships)
124
+ ```
125
+ ### Get Table Contents
126
+ To retrieve the contents of a specified table:
127
+ ```python
128
+ table_name = 'YourTableName'
129
+ table_contents = model.get_table(table_name)
130
+ print(table_contents)
131
+ ```
132
+ ### Statistics
133
+ To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
134
+ ```python
135
+ statistics = model.statistics
136
+ print(statistics)
137
+ ```
@@ -0,0 +1,98 @@
1
+ # PBIXRay
2
+ [![Downloads](https://static.pepy.tech/badge/pbixray)](https://pepy.tech/project/pbixray)
3
+ ## Overview
4
+
5
+ PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
6
+
7
+ This library is the Python implementation of the logic embedded in the DuckDB extension [duckdb-pbix-extension](https://github.com/Hugoberry/duckdb-pbix-extension/).
8
+
9
+ ## Installation
10
+
11
+ Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
12
+
13
+ ```bash
14
+ pip install pbixray
15
+ ```
16
+
17
+ ## Getting Started
18
+ To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
19
+ ```python
20
+ from pbixray import PBIXRay
21
+
22
+ model = PBIXRay('path/to/your/file.pbix')
23
+ ```
24
+
25
+ ## Features and Usage
26
+ ### Tables
27
+ To list all tables in the model:
28
+ ```python
29
+ tables = model.tables
30
+ print(tables)
31
+ ```
32
+ ### Metadata
33
+ To get metadata about the Power BI configuration used during model creation:
34
+ ```python
35
+ metadata = model.metadata
36
+ print(metadata)
37
+ ```
38
+ ### Power Query
39
+ To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
40
+ ```python
41
+ power_query = model.power_query
42
+ print(power_query)
43
+ ```
44
+ ### M Parameters
45
+ To display all M Parameters values in a dataframe with `ParameterName`, `Description`, `Expression` and `ModifiedTime` columns:
46
+ ```python
47
+ m_parameters = model.m_parameters
48
+ print(m_parameters)
49
+ ```
50
+ ### Model Size
51
+ To find out the model size in bytes:
52
+ ```python
53
+ size = model.size
54
+ print(f"Model size: {size} bytes")
55
+ ```
56
+ ### DAX Calculated Tables
57
+ To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
58
+ ```python
59
+ dax_tables = model.dax_tables
60
+ print(dax_tables)
61
+ ```
62
+ ### DAX Measures
63
+ To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
64
+ ```python
65
+ dax_measures = model.dax_measures
66
+ print(dax_measures)
67
+ ```
68
+ ### Calculated Columns
69
+ To access calculated column DAX expressions in a dataframe with `TableName`,`ColumnName` and `Expression` columns:
70
+ ```python
71
+ dax_columns = model.dax_columns
72
+ print(dax_columns)
73
+ ```
74
+ ### Schema
75
+ To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
76
+ ```python
77
+ schema = model.schema
78
+ print(schema)
79
+ ```
80
+ ### Relationships
81
+ To get the details about the data model relationships in a dataframe with `FromTableName`, `FromColumnName`, `ToTableName`, `ToColumnName`, `IsActive`, `Cardinality`, `CrossFilteringBehavior`, `FromKeyCount`, `ToKeyCount` and `RelyOnReferentialIntegrity` columns:
82
+ ```python
83
+ relationships = model.relationships
84
+ print(relationships)
85
+ ```
86
+ ### Get Table Contents
87
+ To retrieve the contents of a specified table:
88
+ ```python
89
+ table_name = 'YourTableName'
90
+ table_contents = model.get_table(table_name)
91
+ print(table_contents)
92
+ ```
93
+ ### Statistics
94
+ To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
95
+ ```python
96
+ statistics = model.statistics
97
+ print(statistics)
98
+ ```
@@ -0,0 +1 @@
1
+ from .core import PBIXRay
File without changes
@@ -0,0 +1,48 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class BackupLog:
4
+ def __init__(self, xml_string, error_code):
5
+ #if error_code trim last 4 bytes
6
+ if error_code:
7
+ xml_string = xml_string[:-4]
8
+ trimmed_xml_string = xml_string.decode('utf-16')
9
+ root = ET.fromstring(trimmed_xml_string)
10
+ self.BackupRestoreSyncVersion = root.findtext("BackupRestoreSyncVersion")
11
+ self.ServerRoot = root.findtext("ServerRoot")
12
+ self.SvrEncryptPwdFlag = root.findtext("SvrEncryptPwdFlag") == "true"
13
+ self.ServerEnableBinaryXML = root.findtext("ServerEnableBinaryXML") == "true"
14
+ self.ServerEnableCompression = root.findtext("ServerEnableCompression") == "true"
15
+ self.CompressionFlag = root.findtext("CompressionFlag") == "true"
16
+ self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
17
+ self.ObjectName = root.findtext("ObjectName")
18
+ self.ObjectId = root.findtext("ObjectId")
19
+ self.Write = root.findtext("Write")
20
+ self.OlapInfo = root.findtext("OlapInfo") == "true"
21
+ self.Collations = [collation.text for collation in root.findall("Collations/Collation")]
22
+ self.Languages = [int(lang.text) for lang in root.findall("Languages/Language")]
23
+ self.FileGroups = [FileGroup.from_xml(filegroup) for filegroup in root.findall("FileGroups/FileGroup")]
24
+
25
+ class FileGroup:
26
+ @classmethod
27
+ def from_xml(cls, element):
28
+ fileGroup = cls()
29
+ fileGroup.Class = int(element.findtext("Class"))
30
+ fileGroup.ID = element.findtext("ID")
31
+ fileGroup.Name = element.findtext("Name")
32
+ fileGroup.ObjectVersion = int(element.findtext("ObjectVersion"))
33
+ fileGroup.PersistLocation = int(element.findtext("PersistLocation"))
34
+ fileGroup.PersistLocationPath = element.findtext("PersistLocationPath")
35
+ fileGroup.StorageLocationPath = element.findtext("StorageLocationPath")
36
+ fileGroup.ObjectID = element.findtext("ObjectID")
37
+ fileGroup.FileList = [BackupFile.from_xml(backupfile) for backupfile in element.findall("FileList/BackupFile")]
38
+ return fileGroup
39
+
40
+ class BackupFile:
41
+ @classmethod
42
+ def from_xml(cls, element):
43
+ backupFile = cls()
44
+ backupFile.Path = element.findtext("Path")
45
+ backupFile.StoragePath = element.findtext("StoragePath")
46
+ backupFile.LastWriteTime = int(element.findtext("LastWriteTime"))
47
+ backupFile.Size = int(element.findtext("Size"))
48
+ return backupFile
@@ -0,0 +1,24 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class BackupLogHeader:
4
+ def __init__(self, xml_string):
5
+ # header is always one page (4096 bytes) in size and padded with zeros between the last header element and the end of the page
6
+ trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
7
+ root = ET.fromstring(trimmed_xml_string)
8
+ self.BackupRestoreSyncVersion = int(root.findtext("BackupRestoreSyncVersion"))
9
+ self.Fault = root.findtext("Fault") == "true"
10
+ self.faultcode = int(root.findtext("faultcode"))
11
+ self.ErrorCode = root.findtext("ErrorCode") == "true"
12
+ self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
13
+ self.EncryptionKey = int(root.findtext("EncryptionKey"))
14
+ self.ApplyCompression = root.findtext("ApplyCompression") == "true"
15
+ self.m_cbOffsetHeader = int(root.findtext("m_cbOffsetHeader"))
16
+ self.DataSize = int(root.findtext("DataSize"))
17
+ self.Files = int(root.findtext("Files"))
18
+ self.ObjectID = root.findtext("ObjectID")
19
+ self.m_cbOffsetData = int(root.findtext("m_cbOffsetData"))
20
+
21
+ @classmethod
22
+ def from_xml_string(cls, xml_string):
23
+ trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
24
+ return cls(trimmed_xml_string)
@@ -0,0 +1,8 @@
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class DataModel:
5
+ file_log: list
6
+ decompressed_data: bytes
7
+ error_code: bool = False
8
+ apply_compression: bool = False
@@ -0,0 +1,65 @@
1
+ import xml.etree.ElementTree as ET
2
+ from .backup_log import BackupLog
3
+ from .backup_log_header import BackupLogHeader
4
+ from .virtual_directory import VirtualDirectory
5
+ from .data_model import DataModel
6
+
7
+ class AbfParser:
8
+ def __init__(self, data_model:DataModel):
9
+ self.data_model = data_model
10
+ self.__buffer = data_model.decompressed_data
11
+ self.__backup_log_header = None
12
+ self.__virtual_directory = None
13
+ self.__backup_log = None
14
+ self.file_log = None # public attribute
15
+
16
+ # Trigger the parsing process upon initialization
17
+ self.__parse_all()
18
+
19
+ def __parse_all(self):
20
+ self.__parse_backup_log_header()
21
+ self.__parse_virtual_directory()
22
+ self.__parse_backup_log()
23
+ self.__match_logs_and_get_attributes()
24
+
25
+ def __parse_backup_log_header(self):
26
+ offset = 72 # STREAM_STORAGE_SIGNATURE_)!@#$%^&*(
27
+ page = 0x1000 # header is always one page (4096 bytes) in size
28
+ self.__backup_log_header = BackupLogHeader(self.__buffer[offset:page])
29
+
30
+ def __parse_virtual_directory(self):
31
+ offset = int(self.__backup_log_header.m_cbOffsetHeader)
32
+ size = int(self.__backup_log_header.DataSize)
33
+ self.__virtual_directory = VirtualDirectory(self.__buffer[offset: offset+size])
34
+
35
+ def __parse_backup_log(self):
36
+ log = self.__virtual_directory.BackupFiles[-1]
37
+ offset = int(log.m_cbOffsetHeader)
38
+ size = int(log.Size)
39
+ self.data_model.error_code = self.__backup_log_header.ErrorCode
40
+ self.data_model.apply_compression = self.__backup_log_header.ApplyCompression
41
+ self.__backup_log = BackupLog(self.__buffer[offset:offset+size], self.__backup_log_header.ErrorCode)
42
+
43
+ def __match_logs_and_get_attributes(self):
44
+ persist_root = self.__backup_log.FileGroups[1].PersistLocationPath + '\\'
45
+ virtual_directory_files_by_path = {file.Path: file for file in self.__virtual_directory.BackupFiles}
46
+ matched_data = []
47
+
48
+ for file_group in self.__backup_log.FileGroups:
49
+ for backup_file in file_group.FileList:
50
+ if backup_file.StoragePath in virtual_directory_files_by_path:
51
+ matched_file = virtual_directory_files_by_path[backup_file.StoragePath]
52
+ if backup_file.Path.startswith(persist_root):
53
+ path_without_persist_root = backup_file.Path.replace(persist_root, '', 1)
54
+ else:
55
+ path_without_persist_root = backup_file.Path
56
+ matched_data.append({
57
+ 'Path': path_without_persist_root,
58
+ 'FileName': path_without_persist_root.split('\\')[-1],
59
+ 'StoragePath': backup_file.StoragePath,
60
+ 'Size': matched_file.Size,
61
+ 'SizeFromLog': backup_file.Size,
62
+ 'm_cbOffsetHeader': matched_file.m_cbOffsetHeader
63
+ })
64
+
65
+ self.data_model.file_log = matched_data
@@ -0,0 +1,20 @@
1
+ import xml.etree.ElementTree as ET
2
+
3
+ class VirtualDirectoryBackupFile:
4
+ def __init__(self, element):
5
+ self.Path = element.findtext("Path")
6
+ self.Size = int(element.findtext("Size"))
7
+ self.m_cbOffsetHeader = int(element.findtext("m_cbOffsetHeader"))
8
+ self.Delete = element.findtext("Delete") == "true"
9
+ self.CreatedTimestamp = int(element.findtext("CreatedTimestamp"))
10
+ self.Access = int(element.findtext("Access"))
11
+ self.LastWriteTime = int(element.findtext("LastWriteTime"))
12
+
13
+ class VirtualDirectory:
14
+ def __init__(self, xml_string):
15
+ root = ET.fromstring(xml_string)
16
+ self.BackupFiles = [VirtualDirectoryBackupFile(e) for e in root.findall("BackupFile")]
17
+
18
+ @classmethod
19
+ def from_xml_string(cls, xml_string):
20
+ return cls(xml_string)
File without changes
@@ -0,0 +1,246 @@
1
+ # This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
2
+
3
+ import kaitaistruct
4
+ from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
5
+ from enum import Enum
6
+
7
+
8
+ if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 9):
9
+ raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__))
10
+
11
+ class ColumnDataDictionary(KaitaiStruct):
12
+
13
+ class DictionaryTypes(Enum):
14
+ xm_type_invalid = -1
15
+ xm_type_long = 0
16
+ xm_type_real = 1
17
+ xm_type_string = 2
18
+ def __init__(self, _io, _parent=None, _root=None):
19
+ self._io = _io
20
+ self._parent = _parent
21
+ self._root = _root if _root else self
22
+ self._read()
23
+
24
+ def _read(self):
25
+ self.dictionary_type = KaitaiStream.resolve_enum(ColumnDataDictionary.DictionaryTypes, self._io.read_s4le())
26
+ self.hash_information = ColumnDataDictionary.HashInfo(self._io, self, self._root)
27
+ _on = self.dictionary_type
28
+ if _on == ColumnDataDictionary.DictionaryTypes.xm_type_string:
29
+ self.data = ColumnDataDictionary.StringData(self._io, self, self._root)
30
+ elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_long:
31
+ self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
32
+ elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_real:
33
+ self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
34
+
35
+ class StringRecordHandle(KaitaiStruct):
36
+ def __init__(self, _io, _parent=None, _root=None):
37
+ self._io = _io
38
+ self._parent = _parent
39
+ self._root = _root if _root else self
40
+ self._read()
41
+
42
+ def _read(self):
43
+ self.bit_or_byte_offset = self._io.read_u4le()
44
+ self.page_id = self._io.read_u4le()
45
+
46
+
47
+ class StringData(KaitaiStruct):
48
+ def __init__(self, _io, _parent=None, _root=None):
49
+ self._io = _io
50
+ self._parent = _parent
51
+ self._root = _root if _root else self
52
+ self._read()
53
+
54
+ def _read(self):
55
+ self.page_layout_information = ColumnDataDictionary.PageLayout(self._io, self, self._root)
56
+ self.dictionary_pages = []
57
+ for i in range(self.page_layout_information.store_page_count):
58
+ self.dictionary_pages.append(ColumnDataDictionary.DictionaryPage(self._io, self, self._root))
59
+
60
+ self.dictionary_record_handles_vector_info = ColumnDataDictionary.DictionaryRecordHandlesVector(self._io, self, self._root)
61
+
62
+
63
+ class HashInfo(KaitaiStruct):
64
+ def __init__(self, _io, _parent=None, _root=None):
65
+ self._io = _io
66
+ self._parent = _parent
67
+ self._root = _root if _root else self
68
+ self._read()
69
+
70
+ def _read(self):
71
+ self.hash_elements = []
72
+ for i in range(6):
73
+ self.hash_elements.append(self._io.read_s4le())
74
+
75
+
76
+
77
+ class VectorOfVectors(KaitaiStruct):
78
+ def __init__(self, _io, _parent=None, _root=None):
79
+ self._io = _io
80
+ self._parent = _parent
81
+ self._root = _root if _root else self
82
+ self._read()
83
+
84
+ def _read(self):
85
+ self.element_count = self._io.read_u8le()
86
+ self.element_size = self._io.read_u4le()
87
+ self.values = []
88
+ for i in range(self.element_count):
89
+ _on = self.data_type_id
90
+ if _on == u"int32":
91
+ self.values.append(self._io.read_s4le())
92
+ elif _on == u"int64":
93
+ self.values.append(self._io.read_s8le())
94
+ elif _on == u"float64":
95
+ self.values.append(self._io.read_f8le())
96
+
97
+
98
+ @property
99
+ def is_int32(self):
100
+ if hasattr(self, '_m_is_int32'):
101
+ return self._m_is_int32
102
+
103
+ self._m_is_int32 = self.element_size == 4
104
+ return getattr(self, '_m_is_int32', None)
105
+
106
+ @property
107
+ def is_int64(self):
108
+ if hasattr(self, '_m_is_int64'):
109
+ return self._m_is_int64
110
+
111
+ self._m_is_int64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_long))
112
+ return getattr(self, '_m_is_int64', None)
113
+
114
+ @property
115
+ def is_float64(self):
116
+ if hasattr(self, '_m_is_float64'):
117
+ return self._m_is_float64
118
+
119
+ self._m_is_float64 = ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_real))
120
+ return getattr(self, '_m_is_float64', None)
121
+
122
+ @property
123
+ def data_type_id(self):
124
+ if hasattr(self, '_m_data_type_id'):
125
+ return self._m_data_type_id
126
+
127
+ self._m_data_type_id = (u"int32" if self.is_int32 else (u"int64" if self.is_int64 else u"float64"))
128
+ return getattr(self, '_m_data_type_id', None)
129
+
130
+
131
+ class CompressedStrings(KaitaiStruct):
132
+ def __init__(self, _io, _parent=None, _root=None):
133
+ self._io = _io
134
+ self._parent = _parent
135
+ self._root = _root if _root else self
136
+ self._read()
137
+
138
+ def _read(self):
139
+ self.store_total_bits = self._io.read_u4le()
140
+ self.character_set_type_identifier = self._io.read_u4le()
141
+ self.allocation_size = self._io.read_u8le()
142
+ self.character_set_used = self._io.read_u1()
143
+ self.ui_decode_bits = self._io.read_u4le()
144
+ self.encode_array = []
145
+ for i in range(128):
146
+ self.encode_array.append(self._io.read_u1())
147
+
148
+ self.ui64_buffer_size = self._io.read_u8le()
149
+ self.compressed_string_buffer = self._io.read_bytes(self.allocation_size)
150
+
151
+
152
+ class PageLayout(KaitaiStruct):
153
+ def __init__(self, _io, _parent=None, _root=None):
154
+ self._io = _io
155
+ self._parent = _parent
156
+ self._root = _root if _root else self
157
+ self._read()
158
+
159
+ def _read(self):
160
+ self.store_string_count = self._io.read_s8le()
161
+ self.f_store_compressed = self._io.read_s1()
162
+ self.store_longest_string = self._io.read_s8le()
163
+ self.store_page_count = self._io.read_s8le()
164
+
165
+
166
+ class DictionaryPage(KaitaiStruct):
167
+ def __init__(self, _io, _parent=None, _root=None):
168
+ self._io = _io
169
+ self._parent = _parent
170
+ self._root = _root if _root else self
171
+ self._read()
172
+
173
+ def _read(self):
174
+ self.page_mask = self._io.read_u8le()
175
+ self.page_contains_nulls = self._io.read_u1()
176
+ self.page_start_index = self._io.read_u8le()
177
+ self.page_string_count = self._io.read_u8le()
178
+ self.page_compressed = self._io.read_u1()
179
+ self.string_store_begin_mark = self._io.read_bytes(4)
180
+ if not self.string_store_begin_mark == b"\xDD\xCC\xBB\xAA":
181
+ raise kaitaistruct.ValidationNotEqualError(b"\xDD\xCC\xBB\xAA", self.string_store_begin_mark, self._io, u"/types/dictionary_page/seq/5")
182
+ _on = self.page_compressed
183
+ if _on == 0:
184
+ self.string_store = ColumnDataDictionary.UncompressedStrings(self._io, self, self._root)
185
+ elif _on == 1:
186
+ self.string_store = ColumnDataDictionary.CompressedStrings(self._io, self, self._root)
187
+ self.string_store_end_mark = self._io.read_bytes(4)
188
+ if not self.string_store_end_mark == b"\xCD\xAB\xCD\xAB":
189
+ raise kaitaistruct.ValidationNotEqualError(b"\xCD\xAB\xCD\xAB", self.string_store_end_mark, self._io, u"/types/dictionary_page/seq/7")
190
+
191
+
192
+ class OtherRecordHandle(KaitaiStruct):
193
+ def __init__(self, _io, _parent=None, _root=None):
194
+ self._io = _io
195
+ self._parent = _parent
196
+ self._root = _root if _root else self
197
+ self._read()
198
+
199
+ def _read(self):
200
+ self.bit_or_byte_offset = self._io.read_u4le()
201
+
202
+
203
+ class UncompressedStrings(KaitaiStruct):
204
+ def __init__(self, _io, _parent=None, _root=None):
205
+ self._io = _io
206
+ self._parent = _parent
207
+ self._root = _root if _root else self
208
+ self._read()
209
+
210
+ def _read(self):
211
+ self.remaining_store_available = self._io.read_u8le()
212
+ self.buffer_used_characters = self._io.read_u8le()
213
+ self.allocation_size = self._io.read_u8le()
214
+ self.uncompressed_character_buffer = (self._io.read_bytes(self.allocation_size)).decode(u"UTF-16LE")
215
+
216
+
217
+ class NumberData(KaitaiStruct):
218
+ def __init__(self, _io, _parent=None, _root=None):
219
+ self._io = _io
220
+ self._parent = _parent
221
+ self._root = _root if _root else self
222
+ self._read()
223
+
224
+ def _read(self):
225
+ self.vector_of_vectors_info = ColumnDataDictionary.VectorOfVectors(self._io, self, self._root)
226
+
227
+
228
+ class DictionaryRecordHandlesVector(KaitaiStruct):
229
+ def __init__(self, _io, _parent=None, _root=None):
230
+ self._io = _io
231
+ self._parent = _parent
232
+ self._root = _root if _root else self
233
+ self._read()
234
+
235
+ def _read(self):
236
+ self.element_count = self._io.read_u8le()
237
+ self.element_size = self._io.read_bytes(4)
238
+ if not self.element_size == b"\x08\x00\x00\x00":
239
+ raise kaitaistruct.ValidationNotEqualError(b"\x08\x00\x00\x00", self.element_size, self._io, u"/types/dictionary_record_handles_vector/seq/1")
240
+ self.vector_of_record_handle_structures = []
241
+ for i in range(self.element_count):
242
+ self.vector_of_record_handle_structures.append(ColumnDataDictionary.StringRecordHandle(self._io, self, self._root))
243
+
244
+
245
+
246
+