polars-sgt 0.2.5__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/CHANGELOG.md +19 -0
  2. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/Cargo.lock +1 -1
  3. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/Cargo.toml +1 -1
  4. polars_sgt-0.3.0/PKG-INFO +216 -0
  5. polars_sgt-0.3.0/README.md +195 -0
  6. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/functions.py +115 -87
  7. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/sgt_transform.rs +172 -59
  8. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_benchmark.py +1 -1
  9. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_sgt_transform.py +44 -0
  10. polars_sgt-0.2.5/PKG-INFO +0 -226
  11. polars_sgt-0.2.5/README.md +0 -205
  12. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/.github/workflows/CI.yml +0 -0
  13. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/.gitignore +0 -0
  14. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/.python-version +0 -0
  15. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/.readthedocs.yaml +0 -0
  16. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/CODE_OF_CONDUCT.md +0 -0
  17. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/LICENSE +0 -0
  18. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/Makefile +0 -0
  19. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/assets/.DS_Store +0 -0
  20. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/assets/polars-business.png +0 -0
  21. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/bump_version.py +0 -0
  22. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/API.rst +0 -0
  23. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/Makefile +0 -0
  24. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/conf.py +0 -0
  25. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/index.rst +0 -0
  26. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/installation.rst +0 -0
  27. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/docs/requirements-docs.txt +0 -0
  28. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/dprint.json +0 -0
  29. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/licenses/NUMPY_LICENSE.txt +0 -0
  30. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/licenses/PANDAS_LICENSE.txt +0 -0
  31. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/.mypy.ini +0 -0
  32. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/__init__.py +0 -0
  33. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/_internal.pyi +0 -0
  34. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/namespace.py +0 -0
  35. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/py.typed +0 -0
  36. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/ranges.py +0 -0
  37. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/typing.py +0 -0
  38. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/polars_sgt/utils.py +0 -0
  39. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/pyproject.toml +0 -0
  40. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/requirements.txt +0 -0
  41. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/rust-toolchain.toml +0 -0
  42. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/arg_previous_greater.rs +0 -0
  43. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/expressions.rs +0 -0
  44. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/format_localized.rs +0 -0
  45. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/lib.rs +0 -0
  46. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/month_delta.rs +0 -0
  47. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/timezone.rs +0 -0
  48. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/src/to_julian.rs +0 -0
  49. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/__init__.py +0 -0
  50. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/ceil_test.py +0 -0
  51. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/julian_date_test.py +0 -0
  52. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_date_range.py +0 -0
  53. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_format_localized.py +0 -0
  54. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_is_busday.py +0 -0
  55. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_month_delta.py +0 -0
  56. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/test_timezone.py +0 -0
  57. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/tests/verify_sgt.py +0 -0
  58. {polars_sgt-0.2.5 → polars_sgt-0.3.0}/uv.lock +0 -0
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.3.0] - 2026-02-04
9
+
10
+ ### Changed
11
+ - **Major Performance Optimization**: Optimized SGT transform for billion-row scale with O(1) time weight lookups via cumulative product prefix arrays.
12
+ - **Speed Improvements**: Optimized Rust implementation with fast `exp`/`pow` approximations, pre-allocated buffers, and elimination of post-sort overhead.
13
+ - **Enhanced `sgt_transform_df`**:
14
+ - Returns a single merged wide-format DataFrame by default.
15
+ - Automatically prefixes feature names with group values (e.g., `sgt_buy_login`).
16
+ - Uses efficient reduce-join for merging multi-group analysis.
17
+ - Full support for Polar's LazyFrame and streaming engine.
18
+
19
+ ### Fixed
20
+ - **Time Weight Correctness**: Fixed weight calculation for `kappa > 2` to correctly accumulate time penalties across *all* individual transitions in an n-gram.
21
+ - **Numerical Stability**: Implemented periodic renormalization and zero-trap protection for weighted products to prevent underflow in very long sequences.
22
+
23
+ ### Added
24
+ - Comprehensive README documentation with spotlights on high-level APIs, scalability, and grouped analysis usage.
25
+
26
+
8
27
  ## [0.2.5] - 2026-02-04
9
28
 
10
29
  ### Added
@@ -2010,7 +2010,7 @@ dependencies = [
2010
2010
 
2011
2011
  [[package]]
2012
2012
  name = "polars_sgt"
2013
- version = "0.2.5"
2013
+ version = "0.3.0"
2014
2014
  dependencies = [
2015
2015
  "chrono",
2016
2016
  "chrono-tz",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars_sgt"
3
- version = "0.2.5"
3
+ version = "0.3.0"
4
4
  edition = "2021"
5
5
  authors = ["Zedd <lytran14789@gmail.com>", "Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>"]
6
6
  readme = "README.md"
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: polars-sgt
3
+ Version: 0.3.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
7
+ Requires-Dist: maturin>=1.11.5
8
+ Requires-Dist: polars>=1.36.1
9
+ Requires-Dist: pytest>=8.4.2
10
+ Requires-Dist: tqdm>=4.66.0
11
+ License-File: LICENSE
12
+ Summary: Sequence Graph Transform (SGT) for Polars - Transform sequential data into weighted n-gram representations
13
+ Author-email: Zedd <lytran14789@gmail.com>, Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
14
+ Requires-Python: >=3.9
15
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
16
+ Project-URL: Change Log, https://github.com/4ursmile/polars-sgt/releases
17
+ Project-URL: Documentation, https://github.com/4ursmile/polars-sgt
18
+ Project-URL: Issue Tracker, https://github.com/4ursmile/polars-sgt/issues
19
+ Project-URL: Repository, https://github.com/4ursmile/polars-sgt
20
+
21
+ # polars-sgt
22
+
23
+ ## Sequence Graph Transform for Polars
24
+
25
+ [![PyPI version](https://badge.fury.io/py/polars-sgt.svg)](https://badge.fury.io/py/polars-sgt)
26
+
27
+ Transform sequential data into powerful n-gram representations with [Polars](https://www.pola.rs/).
28
+
29
+ **polars-sgt** brings Sequence Graph Transform (SGT) to Polars, enabling you to:
30
+ - ✅ **Transform** sequences into weighted n-gram features
31
+ - ✅ **Grouped Analysis**: Apply SGT across subsets (e.g., by direction, metric) and merge into a single wide DataFrame
32
+ - ✅ **Billion-Row Scale**: Optimized Rust implementation with O(1) time weight lookups
33
+ - ✅ **Temporal Dynamics**: Capture patterns with multiple decay functions across all n-gram transitions
34
+ - ✅ **Flexible**: Support for datetime, date, duration, and numeric time columns
35
+ - ✅ **Lazy & Parallel**: Fully compatible with Polars lazy evaluation and Rayon-backed parallel processing
36
+
37
+ ## What is SGT?
38
+
39
+ Sequence Graph Transform converts sequential data (like user clickstreams, sensor readings, or transaction histories) into weighted n-gram representations. Unlike traditional n-grams, SGT captures:
40
+
41
+ - **Sequential patterns**: Multi-transition dependencies (Unigrams, bigrams, trigrams...)
42
+ - **Temporal dynamics**: Weights decay based on time gaps between events.
43
+ - **Normalized features**: L1/L2 normalization for machine-learning-ready feature spaces.
44
+
45
+ ---
46
+
47
+ ## Performance at Scale
48
+
49
+ Optimized for processing billions of rows:
50
+ - **O(1) Weight Calculation**: Uses cumulative product prefix arrays to calculate multi-transition time weights in constant time.
51
+ - **Zero-Cost Abstraction**: Written in Rust with Rayon for automatic multi-core utilization.
52
+ - **Memory Efficient**: Leverages Polars' arrow-backed memory management.
53
+
54
+ ---
55
+
56
+ ## Installation
57
+
58
+ ```console
59
+ pip install polars-sgt
60
+ ```
61
+
62
+ ## Quick Start
63
+
64
+ ### 1. High-Level API: `sgt_transform_df`
65
+
66
+ The `sgt_transform_df` function is the easiest way to generate SGT features. It handles unnesting, exploding, and pivoting into a wide format automatically.
67
+
68
+ #### Single Group (Default)
69
+ ```python
70
+ import polars as pl
71
+ import polars_sgt as sgt
72
+
73
+ df = pl.DataFrame({
74
+ "user_id": ["A", "A", "A", "B", "B"],
75
+ "action": ["login", "view", "purchase", "login", "view"],
76
+ "time": [1, 2, 10, 1, 5],
77
+ })
78
+
79
+ # Generate wide-format features merged into one DataFrame
80
+ features = sgt.sgt_transform_df(
81
+ df,
82
+ sequence_id_col="user_id",
83
+ state_col="action",
84
+ time_col="time",
85
+ kappa=2
86
+ )
87
+ ```
88
+
89
+ #### Grouped Sequence Analysis
90
+ Calculate separate SGT features for different groups (e.g., event types or directions) and merge them into one wide DataFrame.
91
+
92
+ ```python
93
+ # Calculate SGT features for each 'direction' and 'metric'
94
+ result = sgt.sgt_transform_df(
95
+ df,
96
+ sequence_id_col="user_id",
97
+ state_col="action",
98
+ time_col="time",
99
+ group_cols=["direction", "metric"],
100
+ kappa=3,
101
+ time_penalty="exponential",
102
+ alpha=0.7,
103
+ group_name="analysis"
104
+ )
105
+ # Columns: ['user_id', 'analysis-buy-p_login', 'analysis-sell-p_login', ...]
106
+ ```
107
+
108
+ ### 2. Expression API: `sgt_transform`
109
+
110
+ For more control or integration into complex pipelines, use the expression-based API.
111
+
112
+ ```python
113
+ # Basic expression usage (returns a struct)
114
+ result = df.select(
115
+ sgt.sgt_transform(
116
+ "user_id",
117
+ "action",
118
+ time_col="time",
119
+ kappa=2,
120
+ time_penalty="exponential",
121
+ alpha=0.1,
122
+ mode="l1"
123
+ ).alias("sgt_features")
124
+ )
125
+
126
+ # Extract and explode
127
+ features = result.select([
128
+ pl.col("sgt_features").struct.field("sequence_id"),
129
+ pl.col("sgt_features").struct.field("ngram_keys").alias("ngrams"),
130
+ pl.col("sgt_features").struct.field("value").alias("weights"),
131
+ ]).explode(["ngrams", "weights"])
132
+ ```
133
+
134
+ ### With DateTime Columns
135
+
136
+ ```python
137
+ from datetime import datetime
138
+
139
+ df = pl.DataFrame({
140
+ "session_id": ["A", "A", "A", "A"],
141
+ "event": ["start", "click", "scroll", "exit"],
142
+ "time": [
143
+ datetime(2024, 1, 1, 10, 0),
144
+ datetime(2024, 1, 1, 10, 5),
145
+ datetime(2024, 1, 1, 10, 7),
146
+ datetime(2024, 1, 1, 10, 15),
147
+ ],
148
+ })
149
+
150
+ result = df.select(
151
+ sgt.sgt_transform(
152
+ "session_id",
153
+ "event",
154
+ time_col="time",
155
+ deltatime="m", # unit: minutes
156
+ kappa=3,
157
+ )
158
+ )
159
+ ```
160
+
161
+ ### Lazy Evaluation & Streaming
162
+
163
+ ```python
164
+ result = (
165
+ pl.scan_csv("large_sequences.csv")
166
+ .with_columns(pl.col("timestamp").str.to_datetime())
167
+ .select(
168
+ sgt.sgt_transform(
169
+ "user_id",
170
+ "action",
171
+ time_col="timestamp",
172
+ kappa=2,
173
+ deltatime="h",
174
+ )
175
+ )
176
+ .collect(engine="streaming")
177
+ )
178
+ ```
179
+
180
+ ---
181
+
182
+ ## API Reference
183
+
184
+ ### `sgt.sgt_transform_df`
185
+ The recommended high-level entry point. Returns a wide-format DataFrame.
186
+
187
+ - `df`: Input DataFrame or LazyFrame.
188
+ - `sequence_id_col`: Column(s) identifying sequences.
189
+ - `state_col`: Column containing states/events.
190
+ - `time_col`: Optional timestamp column.
191
+ - `group_cols`: Optional column(s) to group by before SGT.
192
+ - `kappa`: Maximum n-gram size.
193
+ - `mode`: Normalization (`"l1"`, `"l2"`, `"none"`).
194
+ - `time_penalty`: Decay function (`"inverse"`, `"exponential"`, `"linear"`, `"power"`, `"none"`).
195
+
196
+ ### `sgt.sgt_transform` (Expression)
197
+ Returns a struct with `sequence_id`, `ngram_keys`, and `value`.
198
+
199
+ ```python
200
+ df.select(
201
+ sgt.sgt_transform("user", "action", kappa=2).alias("sgt")
202
+ ).unnest("sgt")
203
+ ```
204
+
205
+ ---
206
+
207
+ ## Author & Acknowledgments
208
+
209
+ **Author:** Zedd (lytran14789@gmail.com)
210
+
211
+ **Special Thanks:** Built upon [polars-xdt](https://github.com/MarcoGorelli/polars-xdt) by [Marco Gorelli](https://github.com/MarcoGorelli).
212
+
213
+ ## License
214
+
215
+ MIT
216
+
@@ -0,0 +1,195 @@
1
+ # polars-sgt
2
+
3
+ ## Sequence Graph Transform for Polars
4
+
5
+ [![PyPI version](https://badge.fury.io/py/polars-sgt.svg)](https://badge.fury.io/py/polars-sgt)
6
+
7
+ Transform sequential data into powerful n-gram representations with [Polars](https://www.pola.rs/).
8
+
9
+ **polars-sgt** brings Sequence Graph Transform (SGT) to Polars, enabling you to:
10
+ - ✅ **Transform** sequences into weighted n-gram features
11
+ - ✅ **Grouped Analysis**: Apply SGT across subsets (e.g., by direction, metric) and merge into a single wide DataFrame
12
+ - ✅ **Billion-Row Scale**: Optimized Rust implementation with O(1) time weight lookups
13
+ - ✅ **Temporal Dynamics**: Capture patterns with multiple decay functions across all n-gram transitions
14
+ - ✅ **Flexible**: Support for datetime, date, duration, and numeric time columns
15
+ - ✅ **Lazy & Parallel**: Fully compatible with Polars lazy evaluation and Rayon-backed parallel processing
16
+
17
+ ## What is SGT?
18
+
19
+ Sequence Graph Transform converts sequential data (like user clickstreams, sensor readings, or transaction histories) into weighted n-gram representations. Unlike traditional n-grams, SGT captures:
20
+
21
+ - **Sequential patterns**: Multi-transition dependencies (Unigrams, bigrams, trigrams...)
22
+ - **Temporal dynamics**: Weights decay based on time gaps between events.
23
+ - **Normalized features**: L1/L2 normalization for machine-learning-ready feature spaces.
24
+
25
+ ---
26
+
27
+ ## Performance at Scale
28
+
29
+ Optimized for processing billions of rows:
30
+ - **O(1) Weight Calculation**: Uses cumulative product prefix arrays to calculate multi-transition time weights in constant time.
31
+ - **Zero-Cost Abstraction**: Written in Rust with Rayon for automatic multi-core utilization.
32
+ - **Memory Efficient**: Leverages Polars' arrow-backed memory management.
33
+
34
+ ---
35
+
36
+ ## Installation
37
+
38
+ ```console
39
+ pip install polars-sgt
40
+ ```
41
+
42
+ ## Quick Start
43
+
44
+ ### 1. High-Level API: `sgt_transform_df`
45
+
46
+ The `sgt_transform_df` function is the easiest way to generate SGT features. It handles unnesting, exploding, and pivoting into a wide format automatically.
47
+
48
+ #### Single Group (Default)
49
+ ```python
50
+ import polars as pl
51
+ import polars_sgt as sgt
52
+
53
+ df = pl.DataFrame({
54
+ "user_id": ["A", "A", "A", "B", "B"],
55
+ "action": ["login", "view", "purchase", "login", "view"],
56
+ "time": [1, 2, 10, 1, 5],
57
+ })
58
+
59
+ # Generate wide-format features merged into one DataFrame
60
+ features = sgt.sgt_transform_df(
61
+ df,
62
+ sequence_id_col="user_id",
63
+ state_col="action",
64
+ time_col="time",
65
+ kappa=2
66
+ )
67
+ ```
68
+
69
+ #### Grouped Sequence Analysis
70
+ Calculate separate SGT features for different groups (e.g., event types or directions) and merge them into one wide DataFrame.
71
+
72
+ ```python
73
+ # Calculate SGT features for each 'direction' and 'metric'
74
+ result = sgt.sgt_transform_df(
75
+ df,
76
+ sequence_id_col="user_id",
77
+ state_col="action",
78
+ time_col="time",
79
+ group_cols=["direction", "metric"],
80
+ kappa=3,
81
+ time_penalty="exponential",
82
+ alpha=0.7,
83
+ group_name="analysis"
84
+ )
85
+ # Columns: ['user_id', 'analysis-buy-p_login', 'analysis-sell-p_login', ...]
86
+ ```
87
+
88
+ ### 2. Expression API: `sgt_transform`
89
+
90
+ For more control or integration into complex pipelines, use the expression-based API.
91
+
92
+ ```python
93
+ # Basic expression usage (returns a struct)
94
+ result = df.select(
95
+ sgt.sgt_transform(
96
+ "user_id",
97
+ "action",
98
+ time_col="time",
99
+ kappa=2,
100
+ time_penalty="exponential",
101
+ alpha=0.1,
102
+ mode="l1"
103
+ ).alias("sgt_features")
104
+ )
105
+
106
+ # Extract and explode
107
+ features = result.select([
108
+ pl.col("sgt_features").struct.field("sequence_id"),
109
+ pl.col("sgt_features").struct.field("ngram_keys").alias("ngrams"),
110
+ pl.col("sgt_features").struct.field("value").alias("weights"),
111
+ ]).explode(["ngrams", "weights"])
112
+ ```
113
+
114
+ ### With DateTime Columns
115
+
116
+ ```python
117
+ from datetime import datetime
118
+
119
+ df = pl.DataFrame({
120
+ "session_id": ["A", "A", "A", "A"],
121
+ "event": ["start", "click", "scroll", "exit"],
122
+ "time": [
123
+ datetime(2024, 1, 1, 10, 0),
124
+ datetime(2024, 1, 1, 10, 5),
125
+ datetime(2024, 1, 1, 10, 7),
126
+ datetime(2024, 1, 1, 10, 15),
127
+ ],
128
+ })
129
+
130
+ result = df.select(
131
+ sgt.sgt_transform(
132
+ "session_id",
133
+ "event",
134
+ time_col="time",
135
+ deltatime="m", # unit: minutes
136
+ kappa=3,
137
+ )
138
+ )
139
+ ```
140
+
141
+ ### Lazy Evaluation & Streaming
142
+
143
+ ```python
144
+ result = (
145
+ pl.scan_csv("large_sequences.csv")
146
+ .with_columns(pl.col("timestamp").str.to_datetime())
147
+ .select(
148
+ sgt.sgt_transform(
149
+ "user_id",
150
+ "action",
151
+ time_col="timestamp",
152
+ kappa=2,
153
+ deltatime="h",
154
+ )
155
+ )
156
+ .collect(engine="streaming")
157
+ )
158
+ ```
159
+
160
+ ---
161
+
162
+ ## API Reference
163
+
164
+ ### `sgt.sgt_transform_df`
165
+ The recommended high-level entry point. Returns a wide-format DataFrame.
166
+
167
+ - `df`: Input DataFrame or LazyFrame.
168
+ - `sequence_id_col`: Column(s) identifying sequences.
169
+ - `state_col`: Column containing states/events.
170
+ - `time_col`: Optional timestamp column.
171
+ - `group_cols`: Optional column(s) to group by before SGT.
172
+ - `kappa`: Maximum n-gram size.
173
+ - `mode`: Normalization (`"l1"`, `"l2"`, `"none"`).
174
+ - `time_penalty`: Decay function (`"inverse"`, `"exponential"`, `"linear"`, `"power"`, `"none"`).
175
+
176
+ ### `sgt.sgt_transform` (Expression)
177
+ Returns a struct with `sequence_id`, `ngram_keys`, and `value`.
178
+
179
+ ```python
180
+ df.select(
181
+ sgt.sgt_transform("user", "action", kappa=2).alias("sgt")
182
+ ).unnest("sgt")
183
+ ```
184
+
185
+ ---
186
+
187
+ ## Author & Acknowledgments
188
+
189
+ **Author:** Zedd (lytran14789@gmail.com)
190
+
191
+ **Special Thanks:** Built upon [polars-xdt](https://github.com/MarcoGorelli/polars-xdt) by [Marco Gorelli](https://github.com/MarcoGorelli).
192
+
193
+ ## License
194
+
195
+ MIT