PyPI - pbixray - Versions diffs - 0.1.1__tar.gz - Mend

pbixray 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

pbixray-0.1.1/LICENSE +21 -0
pbixray-0.1.1/PKG-INFO +137 -0
pbixray-0.1.1/README.md +98 -0
pbixray-0.1.1/pbixray/__init__.py +1 -0
pbixray-0.1.1/pbixray/abf/__init__.py +0 -0
pbixray-0.1.1/pbixray/abf/backup_log.py +48 -0
pbixray-0.1.1/pbixray/abf/backup_log_header.py +24 -0
pbixray-0.1.1/pbixray/abf/data_model.py +8 -0
pbixray-0.1.1/pbixray/abf/parser.py +65 -0
pbixray-0.1.1/pbixray/abf/virtual_directory.py +20 -0
pbixray-0.1.1/pbixray/column_data/__init__.py +0 -0
pbixray-0.1.1/pbixray/column_data/dictionary.py +246 -0
pbixray-0.1.1/pbixray/column_data/hidx.py +97 -0
pbixray-0.1.1/pbixray/column_data/idf.py +55 -0
pbixray-0.1.1/pbixray/column_data/idfmeta.py +184 -0
pbixray-0.1.1/pbixray/core.py +71 -0
pbixray-0.1.1/pbixray/huffman.py +79 -0
pbixray-0.1.1/pbixray/meta/__init__.py +0 -0
pbixray-0.1.1/pbixray/meta/metadata_handler.py +62 -0
pbixray-0.1.1/pbixray/meta/metadata_query.py +151 -0
pbixray-0.1.1/pbixray/meta/sqlite_handler.py +30 -0
pbixray-0.1.1/pbixray/pbix_unpacker.py +192 -0
pbixray-0.1.1/pbixray/utils.py +42 -0
pbixray-0.1.1/pbixray/vertipaq_decoder.py +213 -0
pbixray-0.1.1/pbixray/xpress8.py +159 -0
pbixray-0.1.1/pbixray.egg-info/PKG-INFO +137 -0
pbixray-0.1.1/pbixray.egg-info/SOURCES.txt +31 -0
pbixray-0.1.1/pbixray.egg-info/dependency_links.txt +1 -0
pbixray-0.1.1/pbixray.egg-info/requires.txt +4 -0
pbixray-0.1.1/pbixray.egg-info/top_level.txt +1 -0
pbixray-0.1.1/setup.cfg +4 -0
pbixray-0.1.1/setup.py +38 -0
pbixray-0.1.1/test/test_basic_operation.py +31 -0

pbixray-0.1.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Igor Cotruta
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pbixray-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.2
+Name: pbixray
+Version: 0.1.1
+Summary: A Python library to parse and analyze PBIX files used with Microsoft Power BI.
+Home-page: https://github.com/Hugoberry/pbixray
+Author: Igor Cotruta
+License: MIT
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Information Technology
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Scientific/Engineering :: Visualization
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: xpress9
+Requires-Dist: kaitaistruct
+Requires-Dist: pandas
+Requires-Dist: apsw
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+# PBIXRay
+[![Downloads](https://static.pepy.tech/badge/pbixray)](https://pepy.tech/project/pbixray)
+## Overview
+PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
+This library is the Python implementation of the logic embedded in the DuckDB extension [duckdb-pbix-extension](https://github.com/Hugoberry/duckdb-pbix-extension/).
+## Installation
+Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
+```bash
+pip install pbixray
+```
+## Getting Started
+To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
+```python
+from pbixray import PBIXRay
+model = PBIXRay('path/to/your/file.pbix')
+```
+## Features and Usage
+### Tables
+To list all tables in the model:
+```python
+tables = model.tables
+print(tables)
+```
+### Metadata
+To get metadata about the Power BI configuration used during model creation:
+```python
+metadata = model.metadata
+print(metadata)
+```
+### Power Query
+To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
+```python
+power_query = model.power_query
+print(power_query)
+```
+### M Parameters
+To display all M Parameters values in a dataframe with `ParameterName`, `Description`, `Expression` and `ModifiedTime` columns:
+```python
+m_parameters = model.m_parameters
+print(m_parameters)
+```
+### Model Size
+To find out the model size in bytes:
+```python
+size = model.size
+print(f"Model size: {size} bytes")
+```
+### DAX Calculated Tables
+To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
+```python
+dax_tables = model.dax_tables
+print(dax_tables)
+```
+### DAX Measures
+To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
+```python
+dax_measures = model.dax_measures
+print(dax_measures)
+```
+### Calculated Columns
+To access calculated column DAX expressions in a dataframe with `TableName`,`ColumnName` and `Expression` columns:
+```python
+dax_columns = model.dax_columns
+print(dax_columns)
+```
+### Schema
+To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
+```python
+schema = model.schema
+print(schema)
+```
+### Relationships
+To get the details about the data model relationships in a dataframe with `FromTableName`, `FromColumnName`, `ToTableName`, `ToColumnName`, `IsActive`, `Cardinality`, `CrossFilteringBehavior`, `FromKeyCount`, `ToKeyCount` and `RelyOnReferentialIntegrity` columns:
+```python
+relationships = model.relationships
+print(relationships)
+```
+### Get Table Contents
+To retrieve the contents of a specified table:
+```python
+table_name = 'YourTableName'
+table_contents = model.get_table(table_name)
+print(table_contents)
+```
+### Statistics
+To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
+```python
+statistics = model.statistics
+print(statistics)
+```

pbixray-0.1.1/README.md ADDED Viewed

@@ -0,0 +1,98 @@
+# PBIXRay
+[![Downloads](https://static.pepy.tech/badge/pbixray)](https://pepy.tech/project/pbixray)
+## Overview
+PBIXRay is a Python library designed to parse and analyze PBIX files, which are used with Microsoft Power BI. This library provides a straightforward way to extract valuable information from PBIX files, including tables, metadata, Power Query code, and more.
+This library is the Python implementation of the logic embedded in the DuckDB extension [duckdb-pbix-extension](https://github.com/Hugoberry/duckdb-pbix-extension/).
+## Installation
+Before using PBIXRay, ensure you have the following Python modules installed: `apsw`, `kaitaistruct`, and `pbixray`. You can install them using pip:
+```bash
+pip install pbixray
+```
+## Getting Started
+To start using PBIXRay, import the module and initialize it with the path to your PBIX file:
+```python
+from pbixray import PBIXRay
+model = PBIXRay('path/to/your/file.pbix')
+```
+## Features and Usage
+### Tables
+To list all tables in the model:
+```python
+tables = model.tables
+print(tables)
+```
+### Metadata
+To get metadata about the Power BI configuration used during model creation:
+```python
+metadata = model.metadata
+print(metadata)
+```
+### Power Query
+To display all M/Power Query code used for data transformation, in a dataframe with `TableName` and `Expression` columns:
+```python
+power_query = model.power_query
+print(power_query)
+```
+### M Parameters
+To display all M Parameters values in a dataframe with `ParameterName`, `Description`, `Expression` and `ModifiedTime` columns:
+```python
+m_parameters = model.m_parameters
+print(m_parameters)
+```
+### Model Size
+To find out the model size in bytes:
+```python
+size = model.size
+print(f"Model size: {size} bytes")
+```
+### DAX Calculated Tables
+To view DAX calculated tables in a dataframe with `TableName` and `Expression` columns:
+```python
+dax_tables = model.dax_tables
+print(dax_tables)
+```
+### DAX Measures
+To access DAX measures in a dataframe with `TableName`, `Name`, `Expression`, `DisplayFolder`, and `Description` columns:
+```python
+dax_measures = model.dax_measures
+print(dax_measures)
+```
+### Calculated Columns
+To access calculated column DAX expressions in a dataframe with `TableName`,`ColumnName` and `Expression` columns:
+```python
+dax_columns = model.dax_columns
+print(dax_columns)
+```
+### Schema
+To get details about the data model schema and column types in a dataframe with `TableName`, `ColumnName`, and `PandasDataType` columns:
+```python
+schema = model.schema
+print(schema)
+```
+### Relationships
+To get the details about the data model relationships in a dataframe with `FromTableName`, `FromColumnName`, `ToTableName`, `ToColumnName`, `IsActive`, `Cardinality`, `CrossFilteringBehavior`, `FromKeyCount`, `ToKeyCount` and `RelyOnReferentialIntegrity` columns:
+```python
+relationships = model.relationships
+print(relationships)
+```
+### Get Table Contents
+To retrieve the contents of a specified table:
+```python
+table_name = 'YourTableName'
+table_contents = model.get_table(table_name)
+print(table_contents)
+```
+### Statistics
+To get statistics about the model, including column cardinality and byte sizes of dictionary, hash index, and data components, in a dataframe with columns `TableName`, `ColumnName`, `Cardinality`, `Dictionary`, `HashIndex`, and `DataSize`:
+```python
+statistics = model.statistics
+print(statistics)
+```

pbixray-0.1.1/pbixray/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .core import PBIXRay

pbixray-0.1.1/pbixray/abf/__init__.py ADDED Viewed

File without changes

pbixray-0.1.1/pbixray/abf/backup_log.py ADDED Viewed

@@ -0,0 +1,48 @@
+import xml.etree.ElementTree as ET
+class BackupLog:
+    def __init__(self, xml_string, error_code):
+        #if error_code trim last 4 bytes
+        if error_code:
+            xml_string = xml_string[:-4]
+        trimmed_xml_string = xml_string.decode('utf-16')
+        root = ET.fromstring(trimmed_xml_string)
+        self.BackupRestoreSyncVersion = root.findtext("BackupRestoreSyncVersion")
+        self.ServerRoot = root.findtext("ServerRoot")
+        self.SvrEncryptPwdFlag = root.findtext("SvrEncryptPwdFlag") == "true"
+        self.ServerEnableBinaryXML = root.findtext("ServerEnableBinaryXML") == "true"
+        self.ServerEnableCompression = root.findtext("ServerEnableCompression") == "true"
+        self.CompressionFlag = root.findtext("CompressionFlag") == "true"
+        self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
+        self.ObjectName = root.findtext("ObjectName")
+        self.ObjectId = root.findtext("ObjectId")
+        self.Write = root.findtext("Write")
+        self.OlapInfo = root.findtext("OlapInfo") == "true"
+        self.Collations = [collation.text for collation in root.findall("Collations/Collation")]
+        self.Languages = [int(lang.text) for lang in root.findall("Languages/Language")]
+        self.FileGroups = [FileGroup.from_xml(filegroup) for filegroup in root.findall("FileGroups/FileGroup")]
+class FileGroup:
+    @classmethod
+    def from_xml(cls, element):
+        fileGroup = cls()
+        fileGroup.Class = int(element.findtext("Class"))
+        fileGroup.ID = element.findtext("ID")
+        fileGroup.Name = element.findtext("Name")
+        fileGroup.ObjectVersion = int(element.findtext("ObjectVersion"))
+        fileGroup.PersistLocation = int(element.findtext("PersistLocation"))
+        fileGroup.PersistLocationPath = element.findtext("PersistLocationPath")
+        fileGroup.StorageLocationPath = element.findtext("StorageLocationPath")
+        fileGroup.ObjectID = element.findtext("ObjectID")
+        fileGroup.FileList = [BackupFile.from_xml(backupfile) for backupfile in element.findall("FileList/BackupFile")]
+        return fileGroup
+class BackupFile:
+    @classmethod
+    def from_xml(cls, element):
+        backupFile = cls()
+        backupFile.Path = element.findtext("Path")
+        backupFile.StoragePath = element.findtext("StoragePath")
+        backupFile.LastWriteTime = int(element.findtext("LastWriteTime"))
+        backupFile.Size = int(element.findtext("Size"))
+        return backupFile

pbixray-0.1.1/pbixray/abf/backup_log_header.py ADDED Viewed

@@ -0,0 +1,24 @@
+import xml.etree.ElementTree as ET
+class BackupLogHeader:
+    def __init__(self, xml_string):
+        # header is always one page (4096 bytes) in size and padded with zeros between the last header element and the end of the page
+        trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
+        root = ET.fromstring(trimmed_xml_string)
+        self.BackupRestoreSyncVersion = int(root.findtext("BackupRestoreSyncVersion"))
+        self.Fault = root.findtext("Fault") == "true"
+        self.faultcode = int(root.findtext("faultcode"))
+        self.ErrorCode = root.findtext("ErrorCode") == "true"
+        self.EncryptionFlag = root.findtext("EncryptionFlag") == "true"
+        self.EncryptionKey = int(root.findtext("EncryptionKey"))
+        self.ApplyCompression = root.findtext("ApplyCompression") == "true"
+        self.m_cbOffsetHeader = int(root.findtext("m_cbOffsetHeader"))
+        self.DataSize = int(root.findtext("DataSize"))
+        self.Files = int(root.findtext("Files"))
+        self.ObjectID = root.findtext("ObjectID")
+        self.m_cbOffsetData = int(root.findtext("m_cbOffsetData"))
+    @classmethod
+    def from_xml_string(cls, xml_string):
+        trimmed_xml_string = xml_string.decode('utf-16').rstrip('\x00')
+        return cls(trimmed_xml_string)

pbixray-0.1.1/pbixray/abf/data_model.py ADDED Viewed

@@ -0,0 +1,8 @@
+from dataclasses import dataclass
+@dataclass
+class DataModel:
+    file_log: list
+    decompressed_data: bytes
+    error_code: bool = False
+    apply_compression: bool = False

pbixray-0.1.1/pbixray/abf/parser.py ADDED Viewed

@@ -0,0 +1,65 @@
+import xml.etree.ElementTree as ET
+from .backup_log import BackupLog
+from .backup_log_header import BackupLogHeader
+from .virtual_directory import VirtualDirectory
+from .data_model import DataModel
+class AbfParser:
+    def __init__(self, data_model:DataModel):
+        self.data_model = data_model
+        self.__buffer = data_model.decompressed_data
+        self.__backup_log_header = None
+        self.__virtual_directory = None
+        self.__backup_log = None
+        self.file_log = None  # public attribute
+        # Trigger the parsing process upon initialization
+        self.__parse_all()
+    def __parse_all(self):
+        self.__parse_backup_log_header()
+        self.__parse_virtual_directory()
+        self.__parse_backup_log()
+        self.__match_logs_and_get_attributes()
+    def __parse_backup_log_header(self):
+        offset = 72 # STREAM_STORAGE_SIGNATURE_)!@#$%^&*(
+        page = 0x1000 # header is always one page (4096 bytes) in size
+        self.__backup_log_header = BackupLogHeader(self.__buffer[offset:page])
+    def __parse_virtual_directory(self):
+        offset = int(self.__backup_log_header.m_cbOffsetHeader)
+        size = int(self.__backup_log_header.DataSize)
+        self.__virtual_directory = VirtualDirectory(self.__buffer[offset: offset+size])
+    def __parse_backup_log(self):
+        log = self.__virtual_directory.BackupFiles[-1]
+        offset = int(log.m_cbOffsetHeader)
+        size = int(log.Size)
+        self.data_model.error_code = self.__backup_log_header.ErrorCode
+        self.data_model.apply_compression = self.__backup_log_header.ApplyCompression
+        self.__backup_log = BackupLog(self.__buffer[offset:offset+size], self.__backup_log_header.ErrorCode)
+    def __match_logs_and_get_attributes(self):
+        persist_root = self.__backup_log.FileGroups[1].PersistLocationPath + '\\'
+        virtual_directory_files_by_path = {file.Path: file for file in self.__virtual_directory.BackupFiles}
+        matched_data = []
+        for file_group in self.__backup_log.FileGroups:
+            for backup_file in file_group.FileList:
+                if backup_file.StoragePath in virtual_directory_files_by_path:
+                    matched_file = virtual_directory_files_by_path[backup_file.StoragePath]
+                    if backup_file.Path.startswith(persist_root):
+                        path_without_persist_root = backup_file.Path.replace(persist_root, '', 1)
+                    else:
+                        path_without_persist_root = backup_file.Path
+                    matched_data.append({
+                        'Path': path_without_persist_root,
+                        'FileName': path_without_persist_root.split('\\')[-1],
+                        'StoragePath': backup_file.StoragePath,
+                        'Size': matched_file.Size,
+                        'SizeFromLog': backup_file.Size,
+                        'm_cbOffsetHeader': matched_file.m_cbOffsetHeader
+                    })
+        self.data_model.file_log = matched_data

pbixray-0.1.1/pbixray/abf/virtual_directory.py ADDED Viewed

@@ -0,0 +1,20 @@
+import xml.etree.ElementTree as ET
+class VirtualDirectoryBackupFile:
+    def __init__(self, element):
+        self.Path = element.findtext("Path")
+        self.Size = int(element.findtext("Size"))
+        self.m_cbOffsetHeader = int(element.findtext("m_cbOffsetHeader"))
+        self.Delete = element.findtext("Delete") == "true"
+        self.CreatedTimestamp = int(element.findtext("CreatedTimestamp"))
+        self.Access = int(element.findtext("Access"))
+        self.LastWriteTime = int(element.findtext("LastWriteTime"))
+class VirtualDirectory:
+    def __init__(self, xml_string):
+        root = ET.fromstring(xml_string)
+        self.BackupFiles = [VirtualDirectoryBackupFile(e) for e in root.findall("BackupFile")]
+    @classmethod
+    def from_xml_string(cls, xml_string):
+        return cls(xml_string)

pbixray-0.1.1/pbixray/column_data/__init__.py ADDED Viewed

File without changes

pbixray-0.1.1/pbixray/column_data/dictionary.py ADDED Viewed

@@ -0,0 +1,246 @@
+# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
+import kaitaistruct
+from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
+from enum import Enum
+if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 9):
+    raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__))
+class ColumnDataDictionary(KaitaiStruct):
+    class DictionaryTypes(Enum):
+        xm_type_invalid = -1
+        xm_type_long = 0
+        xm_type_real = 1
+        xm_type_string = 2
+    def __init__(self, _io, _parent=None, _root=None):
+        self._io = _io
+        self._parent = _parent
+        self._root = _root if _root else self
+        self._read()
+    def _read(self):
+        self.dictionary_type = KaitaiStream.resolve_enum(ColumnDataDictionary.DictionaryTypes, self._io.read_s4le())
+        self.hash_information = ColumnDataDictionary.HashInfo(self._io, self, self._root)
+        _on = self.dictionary_type
+        if _on == ColumnDataDictionary.DictionaryTypes.xm_type_string:
+            self.data = ColumnDataDictionary.StringData(self._io, self, self._root)
+        elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_long:
+            self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
+        elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_real:
+            self.data = ColumnDataDictionary.NumberData(self._io, self, self._root)
+    class StringRecordHandle(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.bit_or_byte_offset = self._io.read_u4le()
+            self.page_id = self._io.read_u4le()
+    class StringData(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.page_layout_information = ColumnDataDictionary.PageLayout(self._io, self, self._root)
+            self.dictionary_pages = []
+            for i in range(self.page_layout_information.store_page_count):
+                self.dictionary_pages.append(ColumnDataDictionary.DictionaryPage(self._io, self, self._root))
+            self.dictionary_record_handles_vector_info = ColumnDataDictionary.DictionaryRecordHandlesVector(self._io, self, self._root)
+    class HashInfo(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.hash_elements = []
+            for i in range(6):
+                self.hash_elements.append(self._io.read_s4le())
+    class VectorOfVectors(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.element_count = self._io.read_u8le()
+            self.element_size = self._io.read_u4le()
+            self.values = []
+            for i in range(self.element_count):
+                _on = self.data_type_id
+                if _on == u"int32":
+                    self.values.append(self._io.read_s4le())
+                elif _on == u"int64":
+                    self.values.append(self._io.read_s8le())
+                elif _on == u"float64":
+                    self.values.append(self._io.read_f8le())
+        @property
+        def is_int32(self):
+            if hasattr(self, '_m_is_int32'):
+                return self._m_is_int32
+            self._m_is_int32 = self.element_size == 4
+            return getattr(self, '_m_is_int32', None)
+        @property
+        def is_int64(self):
+            if hasattr(self, '_m_is_int64'):
+                return self._m_is_int64
+            self._m_is_int64 =  ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_long))
+            return getattr(self, '_m_is_int64', None)
+        @property
+        def is_float64(self):
+            if hasattr(self, '_m_is_float64'):
+                return self._m_is_float64
+            self._m_is_float64 =  ((self.element_size == 8) and (self._root.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_real))
+            return getattr(self, '_m_is_float64', None)
+        @property
+        def data_type_id(self):
+            if hasattr(self, '_m_data_type_id'):
+                return self._m_data_type_id
+            self._m_data_type_id = (u"int32" if self.is_int32 else (u"int64" if self.is_int64 else u"float64"))
+            return getattr(self, '_m_data_type_id', None)
+    class CompressedStrings(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.store_total_bits = self._io.read_u4le()
+            self.character_set_type_identifier = self._io.read_u4le()
+            self.allocation_size = self._io.read_u8le()
+            self.character_set_used = self._io.read_u1()
+            self.ui_decode_bits = self._io.read_u4le()
+            self.encode_array = []
+            for i in range(128):
+                self.encode_array.append(self._io.read_u1())
+            self.ui64_buffer_size = self._io.read_u8le()
+            self.compressed_string_buffer = self._io.read_bytes(self.allocation_size)
+    class PageLayout(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.store_string_count = self._io.read_s8le()
+            self.f_store_compressed = self._io.read_s1()
+            self.store_longest_string = self._io.read_s8le()
+            self.store_page_count = self._io.read_s8le()
+    class DictionaryPage(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.page_mask = self._io.read_u8le()
+            self.page_contains_nulls = self._io.read_u1()
+            self.page_start_index = self._io.read_u8le()
+            self.page_string_count = self._io.read_u8le()
+            self.page_compressed = self._io.read_u1()
+            self.string_store_begin_mark = self._io.read_bytes(4)
+            if not self.string_store_begin_mark == b"\xDD\xCC\xBB\xAA":
+                raise kaitaistruct.ValidationNotEqualError(b"\xDD\xCC\xBB\xAA", self.string_store_begin_mark, self._io, u"/types/dictionary_page/seq/5")
+            _on = self.page_compressed
+            if _on == 0:
+                self.string_store = ColumnDataDictionary.UncompressedStrings(self._io, self, self._root)
+            elif _on == 1:
+                self.string_store = ColumnDataDictionary.CompressedStrings(self._io, self, self._root)
+            self.string_store_end_mark = self._io.read_bytes(4)
+            if not self.string_store_end_mark == b"\xCD\xAB\xCD\xAB":
+                raise kaitaistruct.ValidationNotEqualError(b"\xCD\xAB\xCD\xAB", self.string_store_end_mark, self._io, u"/types/dictionary_page/seq/7")
+    class OtherRecordHandle(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.bit_or_byte_offset = self._io.read_u4le()
+    class UncompressedStrings(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.remaining_store_available = self._io.read_u8le()
+            self.buffer_used_characters = self._io.read_u8le()
+            self.allocation_size = self._io.read_u8le()
+            self.uncompressed_character_buffer = (self._io.read_bytes(self.allocation_size)).decode(u"UTF-16LE")
+    class NumberData(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.vector_of_vectors_info = ColumnDataDictionary.VectorOfVectors(self._io, self, self._root)
+    class DictionaryRecordHandlesVector(KaitaiStruct):
+        def __init__(self, _io, _parent=None, _root=None):
+            self._io = _io
+            self._parent = _parent
+            self._root = _root if _root else self
+            self._read()
+        def _read(self):
+            self.element_count = self._io.read_u8le()
+            self.element_size = self._io.read_bytes(4)
+            if not self.element_size == b"\x08\x00\x00\x00":
+                raise kaitaistruct.ValidationNotEqualError(b"\x08\x00\x00\x00", self.element_size, self._io, u"/types/dictionary_record_handles_vector/seq/1")
+            self.vector_of_record_handle_structures = []
+            for i in range(self.element_count):
+                self.vector_of_record_handle_structures.append(ColumnDataDictionary.StringRecordHandle(self._io, self, self._root))