PyPI - LZGraphs - Versions diffs - 1.2.0__tar.gz → 2.1.0__tar.gz - Mend

LZGraphs 1.2.0tar.gz → 2.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{lzgraphs-1.2.0 → lzgraphs-2.1.0}/CHANGELOG.md RENAMED Viewed

@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Custom exceptions module with comprehensive exception hierarchy for better error handling
-- Information-theoretic metrics module (`LZGraphs.Metrics.entropy`)
+- Information-theoretic metrics module (`LZGraphs.metrics.entropy`)
   - `node_entropy()` - Shannon entropy of node probability distribution
   - `edge_entropy()` - Shannon entropy of edge transition probabilities
   - `graph_entropy()` - Combined graph entropy measure

{lzgraphs-1.2.0 → lzgraphs-2.1.0}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,7 @@
 Metadata-Version: 2.4
 Name: LZGraphs
-Version: 1.2.0
+Version: 2.1.0
 Summary: An Implementation of LZ76 Based Graphs for Repertoire Representation and Analysis
-Home-page: https://github.com/MuteJester/LZGraphs
-Download-URL: https://github.com/MuteJester/LZGraphs/archive/refs/tags/Beta1.1.1.tar.gz
-Author: Thomas Konstantinovsky
 Author-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
 Maintainer-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
 License: MIT
@@ -27,16 +24,17 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Operating System :: OS Independent
 Classifier: Typing :: Typed
-Requires-Python: >=3.8, <4
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: networkx>=3.0
 Requires-Dist: numpy>=1.24
 Requires-Dist: pandas>=1.5
 Requires-Dist: tqdm>=4.65
-Requires-Dist: matplotlib>=3.7
-Requires-Dist: seaborn>=0.12
 Requires-Dist: scipy>=1.10
+Provides-Extra: viz
+Requires-Dist: matplotlib>=3.7; extra == "viz"
+Requires-Dist: seaborn>=0.12; extra == "viz"
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0; extra == "dev"
 Requires-Dist: pytest-cov>=4.0; extra == "dev"
@@ -51,11 +49,7 @@ Requires-Dist: mkdocs>=1.5; extra == "docs"
 Requires-Dist: mkdocs-material>=9.0; extra == "docs"
 Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
 Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
-Dynamic: author
-Dynamic: download-url
-Dynamic: home-page
 Dynamic: license-file
-Dynamic: requires-python
 <p align="center">

{lzgraphs-1.2.0 → lzgraphs-2.1.0}/pyproject.toml RENAMED Viewed

@@ -47,12 +47,14 @@ dependencies = [
     "numpy>=1.24",
     "pandas>=1.5",
     "tqdm>=4.65",
-    "matplotlib>=3.7",
-    "seaborn>=0.12",
     "scipy>=1.10",
 ]
 [project.optional-dependencies]
+viz = [
+    "matplotlib>=3.7",
+    "seaborn>=0.12",
+]
 dev = [
     "pytest>=7.0",
     "pytest-cov>=4.0",
@@ -79,7 +81,7 @@ Changelog = "https://github.com/MuteJester/LZGraphs/blob/master/CHANGELOG.md"
 [tool.setuptools]
 package-dir = {"" = "src"}
-packages = ["LZGraphs", "LZGraphs.Graphs", "LZGraphs.Metrics", "LZGraphs.Utilities", "LZGraphs.Mixins", "LZGraphs.BagOfWords", "LZGraphs.Visualization", "LZGraphs.Exceptions"]
+packages = ["LZGraphs", "LZGraphs.graphs", "LZGraphs.metrics", "LZGraphs.utilities", "LZGraphs.mixins", "LZGraphs.bag_of_words", "LZGraphs.visualization", "LZGraphs.exceptions"]
 [tool.setuptools.dynamic]
 version = {attr = "LZGraphs.__version__"}

lzgraphs-2.1.0/requirements.txt ADDED Viewed

@@ -0,0 +1,5 @@
+networkx>=3.0
+numpy>=1.24
+pandas>=1.5
+tqdm>=4.65
+scipy>=1.10

lzgraphs-2.1.0/src/LZGraphs/__init__.py ADDED Viewed

@@ -0,0 +1,195 @@
+__version__ = "2.1.0"
+# =============================================================================
+# Graph classes
+# =============================================================================
+from .graphs.amino_acid_positional import AAPLZGraph
+from .graphs.nucleotide_double_positional import NDPLZGraph
+from .graphs.naive import NaiveLZGraph
+# =============================================================================
+# Graph operations
+# =============================================================================
+from .graphs.graph_operations import graph_union
+# =============================================================================
+# Bag of Words
+# =============================================================================
+from .bag_of_words.bow_encoder import LZBOW
+# =============================================================================
+# Metrics - Diversity
+# =============================================================================
+from .metrics.diversity import (
+    LZCentrality,
+    K_Diversity,
+    K100_Diversity,
+    K500_Diversity,
+    K1000_Diversity,
+    K5000_Diversity,
+    adaptive_K_Diversity,
+)
+# =============================================================================
+# Metrics - Entropy / Information Theory
+# =============================================================================
+from .metrics.entropy import (
+    node_entropy,
+    edge_entropy,
+    graph_entropy,
+    normalized_graph_entropy,
+    sequence_perplexity,
+    repertoire_perplexity,
+    jensen_shannon_divergence,
+    cross_entropy,
+    kl_divergence,
+    mutual_information_genes,
+    transition_predictability,
+    graph_compression_ratio,
+    repertoire_compressibility_index,
+    transition_kl_divergence,
+    transition_jsd,
+    transition_mutual_information_profile,
+    path_entropy_rate,
+)
+# =============================================================================
+# Metrics - Saturation
+# =============================================================================
+from .metrics.saturation import NodeEdgeSaturationProbe
+# =============================================================================
+# Metrics - Convenience
+# =============================================================================
+from .metrics.convenience import compare_repertoires
+# =============================================================================
+# Utilities
+# =============================================================================
+from .utilities.helpers import generate_kmer_dictionary
+from .utilities.decomposition import lempel_ziv_decomposition
+# =============================================================================
+# Visualization (optional dependency)
+# =============================================================================
+try:
+    from .visualization.visualize import (
+        sequence_genomic_edges_variability_plot,
+        sequence_genomic_node_variability_plot,
+        sequence_possible_paths_plot,
+        ancestors_descendants_curves_plot,
+        draw_graph,
+    )
+except ImportError:
+    pass  # Visualization features not available without matplotlib/seaborn
+# =============================================================================
+# Exceptions
+# =============================================================================
+from .exceptions import (
+    # Base
+    LZGraphError,
+    # Input validation
+    InputValidationError,
+    EmptyDataError,
+    MissingColumnError,
+    InvalidSequenceError,
+    InvalidProbabilityError,
+    # Graph construction
+    GraphConstructionError,
+    EncodingError,
+    # Gene data
+    GeneDataError,
+    NoGeneDataError,
+    GeneAnnotationError,
+    # Walk/probability
+    WalkError,
+    NoValidPathError,
+    MissingNodeError,
+    MissingEdgeError,
+    # Serialization
+    SerializationError,
+    UnsupportedFormatError,
+    CorruptedFileError,
+    # BOW
+    BOWError,
+    EncodingFunctionMismatchError,
+    UnfittedBOWError,
+    # Graph operations
+    GraphOperationError,
+    IncompatibleGraphsError,
+    # Metrics
+    MetricsError,
+    InsufficientDataError,
+)
+__all__ = [
+    # Graph classes
+    'AAPLZGraph',
+    'NDPLZGraph',
+    'NaiveLZGraph',
+    # Graph operations
+    'graph_union',
+    # Bag of Words
+    'LZBOW',
+    # Diversity metrics
+    'LZCentrality',
+    'K_Diversity',
+    'K100_Diversity',
+    'K500_Diversity',
+    'K1000_Diversity',
+    'K5000_Diversity',
+    'adaptive_K_Diversity',
+    # Entropy metrics
+    'node_entropy',
+    'edge_entropy',
+    'graph_entropy',
+    'normalized_graph_entropy',
+    'sequence_perplexity',
+    'repertoire_perplexity',
+    'jensen_shannon_divergence',
+    'cross_entropy',
+    'kl_divergence',
+    'mutual_information_genes',
+    'transition_predictability',
+    'graph_compression_ratio',
+    'repertoire_compressibility_index',
+    'transition_kl_divergence',
+    'transition_jsd',
+    'transition_mutual_information_profile',
+    'path_entropy_rate',
+    # Saturation
+    'NodeEdgeSaturationProbe',
+    # Convenience
+    'compare_repertoires',
+    # Utilities
+    'generate_kmer_dictionary',
+    'lempel_ziv_decomposition',
+    # Exceptions
+    'LZGraphError',
+    'InputValidationError',
+    'EmptyDataError',
+    'MissingColumnError',
+    'InvalidSequenceError',
+    'InvalidProbabilityError',
+    'GraphConstructionError',
+    'EncodingError',
+    'GeneDataError',
+    'NoGeneDataError',
+    'GeneAnnotationError',
+    'WalkError',
+    'NoValidPathError',
+    'MissingNodeError',
+    'MissingEdgeError',
+    'SerializationError',
+    'UnsupportedFormatError',
+    'CorruptedFileError',
+    'BOWError',
+    'EncodingFunctionMismatchError',
+    'UnfittedBOWError',
+    'GraphOperationError',
+    'IncompatibleGraphsError',
+    'MetricsError',
+    'InsufficientDataError',
+]

lzgraphs-2.1.0/src/LZGraphs/bag_of_words/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .bow_encoder import LZBOW
+__all__ = ['LZBOW']

lzgraphs-1.2.0/src/LZGraphs/BagOfWords/BOWEncoder.py → lzgraphs-2.1.0/src/LZGraphs/bag_of_words/bow_encoder.py RENAMED Viewed

@@ -3,8 +3,10 @@ from collections.abc import Iterable
 import numpy as np
 from tqdm.auto import tqdm
-from ..Utilities.decomposition import lempel_ziv_decomposition
-from ..Exceptions import EncodingFunctionMismatchError
+from ..utilities.decomposition import lempel_ziv_decomposition
+from ..exceptions import EncodingFunctionMismatchError
+__all__ = ["LZBOW"]
 class LZBOW:
@@ -44,13 +46,17 @@ class LZBOW:
         self.dictionary_index_map = dict()
         self.dictionary_index_inverse_map = dict()
+    def __repr__(self):
+        return (f"LZBOW(dictionary_size={self.dictionary_size}, "
+                f"observed_sequences={self.observed_sequences})")
     def _derive_index_maps(self):
         self.dictionary_index_map = {pattern: idx for idx, pattern in enumerate(self.dictionary)}
         self.dictionary_index_inverse_map = {idx: pattern for idx, pattern in enumerate(self.dictionary)}
         self.dictionary_size = len(self.dictionary)
     def fit(self, data):
-        if type(data) == str:
+        if isinstance(data, str):
             encoded = self.encoding_function(data)
             self.dictionary = self.dictionary | set(encoded)
             self._derive_index_maps()
@@ -66,18 +72,29 @@ class LZBOW:
         encoded = self.encoding_function(seq)
         return [self.dictionary_index_map[i] for i in encoded if i in self.dictionary]
-    def transform(self, data, normalize=False):
-        if type(data) == str:
+    def transform(self, data, normalize=False, per_sequence=False):
+        if isinstance(data, str):
             result = np.zeros(self.dictionary_size)
             result[self._seq_to_index(data)] += 1
             return result
         elif isinstance(data, Iterable):
-            result = np.zeros(self.dictionary_size)
-            for seq in tqdm(data, leave=False, position=0):
-                result[self._seq_to_index(seq)] += 1
-            if normalize:
-                return result / result.sum()
+            if per_sequence:
+                data_list = list(data)
+                matrix = np.zeros((len(data_list), self.dictionary_size))
+                for i, seq in enumerate(tqdm(data_list, leave=False, position=0)):
+                    matrix[i, self._seq_to_index(seq)] += 1
+                if normalize:
+                    row_sums = matrix.sum(axis=1, keepdims=True)
+                    row_sums[row_sums == 0] = 1  # avoid division by zero
+                    return matrix / row_sums
+                return matrix
             else:
+                result = np.zeros(self.dictionary_size)
+                for seq in tqdm(data, leave=False, position=0):
+                    result[self._seq_to_index(seq)] += 1
+                if normalize:
+                    total = result.sum()
+                    return result / total if total > 0 else result
                 return result
     def load_from(self, other):
@@ -100,5 +117,69 @@ class LZBOW:
         union.observed_sequences = self.observed_sequences + other.observed_sequences
         union.dictionary_index_map = {pattern: idx for idx, pattern in enumerate(union.dictionary)}
         union.dictionary_index_inverse_map = {idx: pattern for idx, pattern in enumerate(union.dictionary)}
-        union.dictionary_size = len(self.dictionary)
+        union.dictionary_size = len(union.dictionary)
         return union
+    def fit_transform(self, data, normalize=False, per_sequence=False):
+        """
+        Fit the encoder on data and transform it in one step.
+        Equivalent to calling fit(data) followed by transform(data), but
+        avoids processing the data twice for fitting.
+        Args:
+            data: A string (single sequence) or iterable of strings.
+            normalize (bool): If True, normalize the output vectors.
+            per_sequence (bool): If True and data is iterable, return a
+                2D matrix (n_sequences x dictionary_size).
+        Returns:
+            np.ndarray: BOW vector(s) for the input data.
+        Example:
+            >>> bow = LZBOW()
+            >>> matrix = bow.fit_transform(sequences, per_sequence=True)
+        """
+        self.fit(data)
+        return self.transform(data, normalize=normalize, per_sequence=per_sequence)
+    def tfidf_transform(self, data):
+        """
+        Transform sequences into TF-IDF weighted bag-of-words vectors.
+        TF-IDF (Term Frequency - Inverse Document Frequency) weights
+        down-weight subpatterns that appear in many sequences and up-weight
+        those that are more discriminative.
+        The encoder must be fitted before calling this method.
+        Args:
+            data: An iterable of sequence strings.
+        Returns:
+            np.ndarray: 2D matrix (n_sequences x dictionary_size) with TF-IDF weights.
+        Example:
+            >>> bow = LZBOW()
+            >>> bow.fit(train_sequences)
+            >>> tfidf_matrix = bow.tfidf_transform(test_sequences)
+        """
+        # Get per-sequence term frequency matrix
+        tf_matrix = self.transform(data, per_sequence=True)
+        n_docs = tf_matrix.shape[0]
+        if n_docs == 0:
+            return tf_matrix
+        # Compute document frequency: number of sequences containing each term
+        doc_freq = np.count_nonzero(tf_matrix, axis=0).astype(np.float64)
+        # IDF = log(1 + N / (1 + df)), smoothed variant that's always non-negative
+        idf = np.log1p(n_docs / (1.0 + doc_freq))
+        # Normalize TF per row (L1 normalization)
+        row_sums = tf_matrix.sum(axis=1, keepdims=True)
+        row_sums[row_sums == 0] = 1  # avoid division by zero
+        tf_normalized = tf_matrix / row_sums
+        return tf_normalized * idf

{lzgraphs-1.2.0/src/LZGraphs/Exceptions → lzgraphs-2.1.0/src/LZGraphs/exceptions}/__init__.py RENAMED Viewed

@@ -27,7 +27,7 @@ Exception Hierarchy:
         └── IncompatibleGraphsError
 Example:
-    >>> from LZGraphs.Exceptions import NoGeneDataError, InvalidSequenceError
+    >>> from LZGraphs.exceptions import NoGeneDataError, InvalidSequenceError
     >>> try:
     ...     graph.genomic_random_walk()
     ... except NoGeneDataError as e:

lzgraphs-2.1.0/src/LZGraphs/graphs/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .amino_acid_positional import AAPLZGraph
+from .nucleotide_double_positional import NDPLZGraph
+from .naive import NaiveLZGraph
+from .graph_operations import graph_union
+__all__ = ['AAPLZGraph', 'NDPLZGraph', 'NaiveLZGraph', 'graph_union']

LZGraphs 1.2.0__tar.gz → 2.1.0__tar.gz

LZGraphs 1.2.0tar.gz → 2.1.0tar.gz