openm3u8 7.0.0__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3122 @@
1
+ /*
2
+ * Copyright 2014 Globo.com Player authors. All rights reserved.
3
+ * Modifications Copyright (c) 2026 Wurl.
4
+ * Use of this source code is governed by a MIT License
5
+ * license that can be found in the LICENSE file.
6
+ *
7
+ * C extension for m3u8 parser - provides optimized parsing of M3U8 playlists.
8
+ *
9
+ * This module implements the same parsing logic as m3u8/parser.py but in C
10
+ * for improved performance. The output is designed to be identical to the
11
+ * Python implementation.
12
+ *
13
+ * Design notes (following CPython extension best practices per PEP 7):
14
+ *
15
+ * Memory Management:
16
+ * - Uses module state instead of static globals for subinterpreter safety
17
+ * (PEP 573, PEP 3121)
18
+ * - Uses PyMem_* allocators consistently for better debugging/tracing
19
+ * - Single cleanup path via goto for reliable resource management
20
+ * - All borrowed references are clearly documented
21
+ *
22
+ * Performance Optimizations:
23
+ * - Frequently-used dict keys are cached as interned strings
24
+ * - Attribute parsers are const static arrays built at compile time
25
+ * - String operations use restrict pointers where applicable
26
+ *
27
+ * Error Handling:
28
+ * - All Python C API calls that can fail are checked
29
+ * - Helper macros (DICT_SET_AND_DECREF) ensure consistent cleanup
30
+ * - ParseError exception is shared with the Python parser module
31
+ *
32
+ * Thread Safety:
33
+ * - No mutable static state; all state is per-module
34
+ * - GIL is held throughout parsing (no release/acquire)
35
+ */
36
+
37
+ #define PY_SSIZE_T_CLEAN
38
+ #include <Python.h>
39
+ #include <string.h>
40
+ #include <stdlib.h>
41
+ #include <ctype.h>
42
+ #include <math.h>
43
+
44
+ /*
45
+ * Whitespace/Case handling for protocol parsing.
46
+ *
47
+ * We intentionally treat playlist syntax as ASCII per RFC 8216. Using the C
48
+ * locale-dependent ctype tables (isspace/tolower) can cause surprising
49
+ * behavior changes depending on process locale and input bytes > 0x7F.
50
+ *
51
+ * Note: This does not attempt to mirror Python's full Unicode whitespace
52
+ * semantics for str.strip(); the HLS grammar is ASCII and the input here is
53
+ * parsed as UTF-8 bytes.
54
+ */
55
+ static inline int
56
+ ascii_isspace(unsigned char c)
57
+ {
58
+ return c == ' ' || c == '\t' || c == '\n' ||
59
+ c == '\r' || c == '\f' || c == '\v';
60
+ }
61
+
62
+ static inline unsigned char
63
+ ascii_tolower(unsigned char c)
64
+ {
65
+ if (c >= 'A' && c <= 'Z') {
66
+ return (unsigned char)(c + ('a' - 'A'));
67
+ }
68
+ return c;
69
+ }
70
+
71
+ /*
72
+ * Case-insensitive match between a raw buffer and a null-terminated key.
73
+ * Also treats '-' as '_' to match normalized attribute names.
74
+ * This avoids creating Python objects during schema lookup (hot path optimization).
75
+ */
76
+ static inline int
77
+ buffer_matches_key(const char *buf, size_t len, const char *key)
78
+ {
79
+ for (size_t i = 0; i < len; i++) {
80
+ if (key[i] == '\0') return 0; /* Key shorter than buffer */
81
+ unsigned char c = (unsigned char)buf[i];
82
+ /* Normalize: lowercase and treat '-' as '_' */
83
+ if (c == '-') c = '_';
84
+ else c = ascii_tolower(c);
85
+ if (c != (unsigned char)key[i]) return 0;
86
+ }
87
+ return key[len] == '\0'; /* Ensure exact length match */
88
+ }
89
+
90
+ /*
91
+ * Compatibility shims for Py_NewRef/Py_XNewRef (added in Python 3.10).
92
+ * These make reference ownership more explicit at call sites.
93
+ */
94
+ #if PY_VERSION_HEX < 0x030a00f0
95
+ static inline PyObject *
96
+ Py_NewRef(PyObject *obj)
97
+ {
98
+ Py_INCREF(obj);
99
+ return obj;
100
+ }
101
+
102
+ static inline PyObject *
103
+ Py_XNewRef(PyObject *obj)
104
+ {
105
+ Py_XINCREF(obj);
106
+ return obj;
107
+ }
108
+ #endif
109
+
110
+ /*
111
+ * Forward declarations for inline helpers used before their definitions.
112
+ */
113
+ static inline int dict_set_interned(PyObject *dict, PyObject *interned_key, PyObject *value);
114
+ static inline PyObject *dict_get_interned(PyObject *dict, PyObject *interned_key);
115
+
116
+ /*
117
+ * Helper macro for setting dict items with proper error handling.
118
+ * Decrefs value and returns/gotos on failure.
119
+ */
120
+ #define DICT_SET_AND_DECREF(dict, key, value, cleanup_label) \
121
+ do { \
122
+ if (PyDict_SetItemString((dict), (key), (value)) < 0) { \
123
+ Py_DECREF(value); \
124
+ goto cleanup_label; \
125
+ } \
126
+ Py_DECREF(value); \
127
+ } while (0)
128
+
129
+ /* Protocol tag definitions - must match protocol.py */
130
+ #define EXT_M3U "#EXTM3U"
131
+ #define EXT_X_TARGETDURATION "#EXT-X-TARGETDURATION"
132
+ #define EXT_X_MEDIA_SEQUENCE "#EXT-X-MEDIA-SEQUENCE"
133
+ #define EXT_X_DISCONTINUITY_SEQUENCE "#EXT-X-DISCONTINUITY-SEQUENCE"
134
+ #define EXT_X_PROGRAM_DATE_TIME "#EXT-X-PROGRAM-DATE-TIME"
135
+ #define EXT_X_MEDIA "#EXT-X-MEDIA"
136
+ #define EXT_X_PLAYLIST_TYPE "#EXT-X-PLAYLIST-TYPE"
137
+ #define EXT_X_KEY "#EXT-X-KEY"
138
+ #define EXT_X_STREAM_INF "#EXT-X-STREAM-INF"
139
+ #define EXT_X_VERSION "#EXT-X-VERSION"
140
+ #define EXT_X_ALLOW_CACHE "#EXT-X-ALLOW-CACHE"
141
+ #define EXT_X_ENDLIST "#EXT-X-ENDLIST"
142
+ #define EXTINF "#EXTINF"
143
+ #define EXT_I_FRAMES_ONLY "#EXT-X-I-FRAMES-ONLY"
144
+ #define EXT_X_ASSET "#EXT-X-ASSET"
145
+ #define EXT_X_BITRATE "#EXT-X-BITRATE"
146
+ #define EXT_X_BYTERANGE "#EXT-X-BYTERANGE"
147
+ #define EXT_X_I_FRAME_STREAM_INF "#EXT-X-I-FRAME-STREAM-INF"
148
+ #define EXT_X_DISCONTINUITY "#EXT-X-DISCONTINUITY"
149
+ #define EXT_X_CUE_OUT "#EXT-X-CUE-OUT"
150
+ #define EXT_X_CUE_OUT_CONT "#EXT-X-CUE-OUT-CONT"
151
+ #define EXT_X_CUE_IN "#EXT-X-CUE-IN"
152
+ #define EXT_X_CUE_SPAN "#EXT-X-CUE-SPAN"
153
+ #define EXT_OATCLS_SCTE35 "#EXT-OATCLS-SCTE35"
154
+ #define EXT_IS_INDEPENDENT_SEGMENTS "#EXT-X-INDEPENDENT-SEGMENTS"
155
+ #define EXT_X_MAP "#EXT-X-MAP"
156
+ #define EXT_X_START "#EXT-X-START"
157
+ #define EXT_X_SERVER_CONTROL "#EXT-X-SERVER-CONTROL"
158
+ #define EXT_X_PART_INF "#EXT-X-PART-INF"
159
+ #define EXT_X_PART "#EXT-X-PART"
160
+ #define EXT_X_RENDITION_REPORT "#EXT-X-RENDITION-REPORT"
161
+ #define EXT_X_SKIP "#EXT-X-SKIP"
162
+ #define EXT_X_SESSION_DATA "#EXT-X-SESSION-DATA"
163
+ #define EXT_X_SESSION_KEY "#EXT-X-SESSION-KEY"
164
+ #define EXT_X_PRELOAD_HINT "#EXT-X-PRELOAD-HINT"
165
+ #define EXT_X_DATERANGE "#EXT-X-DATERANGE"
166
+ #define EXT_X_GAP "#EXT-X-GAP"
167
+ #define EXT_X_CONTENT_STEERING "#EXT-X-CONTENT-STEERING"
168
+ #define EXT_X_IMAGE_STREAM_INF "#EXT-X-IMAGE-STREAM-INF"
169
+ #define EXT_X_IMAGES_ONLY "#EXT-X-IMAGES-ONLY"
170
+ #define EXT_X_TILES "#EXT-X-TILES"
171
+ #define EXT_X_BLACKOUT "#EXT-X-BLACKOUT"
172
+
173
+ /*
174
+ * X-macro for interned strings.
175
+ *
176
+ * This eliminates 4x duplication: struct fields, init, traverse, clear.
177
+ * Define once, expand everywhere with different operations.
178
+ *
179
+ * Format: X(field_name, string_value)
180
+ */
181
+ #define INTERNED_STRINGS(X) \
182
+ /* Core parsing keys */ \
183
+ X(str_segment, "segment") \
184
+ X(str_segments, "segments") \
185
+ X(str_duration, "duration") \
186
+ X(str_uri, "uri") \
187
+ X(str_title, "title") \
188
+ X(str_expect_segment, "expect_segment") \
189
+ X(str_expect_playlist, "expect_playlist") \
190
+ X(str_current_key, "current_key") \
191
+ X(str_keys, "keys") \
192
+ X(str_cue_out, "cue_out") \
193
+ X(str_cue_in, "cue_in") \
194
+ /* Segment/state keys */ \
195
+ X(str_program_date_time, "program_date_time") \
196
+ X(str_current_program_date_time, "current_program_date_time") \
197
+ X(str_cue_out_start, "cue_out_start") \
198
+ X(str_cue_out_explicitly_duration, "cue_out_explicitly_duration") \
199
+ X(str_current_cue_out_scte35, "current_cue_out_scte35") \
200
+ X(str_current_cue_out_oatcls_scte35, "current_cue_out_oatcls_scte35") \
201
+ X(str_current_cue_out_duration, "current_cue_out_duration") \
202
+ X(str_current_cue_out_elapsedtime, "current_cue_out_elapsedtime") \
203
+ X(str_scte35, "scte35") \
204
+ X(str_oatcls_scte35, "oatcls_scte35") \
205
+ X(str_scte35_duration, "scte35_duration") \
206
+ X(str_scte35_elapsedtime, "scte35_elapsedtime") \
207
+ X(str_asset_metadata, "asset_metadata") \
208
+ X(str_discontinuity, "discontinuity") \
209
+ X(str_key, "key") \
210
+ X(str_current_segment_map, "current_segment_map") \
211
+ X(str_init_section, "init_section") \
212
+ X(str_dateranges, "dateranges") \
213
+ X(str_gap, "gap") \
214
+ X(str_gap_tag, "gap_tag") \
215
+ X(str_blackout, "blackout") \
216
+ X(str_byterange, "byterange") \
217
+ X(str_bitrate, "bitrate") \
218
+ /* Data dict keys */ \
219
+ X(str_playlists, "playlists") \
220
+ X(str_iframe_playlists, "iframe_playlists") \
221
+ X(str_image_playlists, "image_playlists") \
222
+ X(str_tiles, "tiles") \
223
+ X(str_media, "media") \
224
+ X(str_rendition_reports, "rendition_reports") \
225
+ X(str_session_data, "session_data") \
226
+ X(str_session_keys, "session_keys") \
227
+ X(str_segment_map, "segment_map") \
228
+ X(str_skip, "skip") \
229
+ X(str_part_inf, "part_inf") \
230
+ X(str_is_variant, "is_variant") \
231
+ X(str_is_endlist, "is_endlist") \
232
+ X(str_is_i_frames_only, "is_i_frames_only") \
233
+ X(str_is_independent_segments, "is_independent_segments") \
234
+ X(str_is_images_only, "is_images_only") \
235
+ X(str_playlist_type, "playlist_type") \
236
+ X(str_media_sequence, "media_sequence") \
237
+ X(str_targetduration, "targetduration") \
238
+ X(str_discontinuity_sequence, "discontinuity_sequence") \
239
+ X(str_version, "version") \
240
+ X(str_allow_cache, "allow_cache") \
241
+ X(str_start, "start") \
242
+ X(str_server_control, "server_control") \
243
+ X(str_preload_hint, "preload_hint") \
244
+ X(str_content_steering, "content_steering") \
245
+ X(str_stream_info, "stream_info") \
246
+ X(str_parts, "parts") \
247
+ X(str_iframe_stream_info, "iframe_stream_info") \
248
+ X(str_image_stream_info, "image_stream_info")
249
+
250
+ /*
251
+ * Module state - holds all per-module data.
252
+ *
253
+ * Using module state instead of static globals ensures:
254
+ * - Proper cleanup when the module is garbage collected
255
+ * - Compatibility with subinterpreters (PEP 573, PEP 3121)
256
+ * - Thread-safe access to cached objects
257
+ */
258
+ typedef struct {
259
+ PyObject *ParseError;
260
+ PyObject *datetime_cls;
261
+ PyObject *timedelta_cls;
262
+ PyObject *fromisoformat_meth;
263
+ /* Interned strings - generated from X-macro */
264
+ #define DECLARE_INTERNED(name, str) PyObject *name;
265
+ INTERNED_STRINGS(DECLARE_INTERNED)
266
+ #undef DECLARE_INTERNED
267
+ } m3u8_state;
268
+
269
+ /*
270
+ * Parse context - holds all state needed during a single parse() call.
271
+ *
272
+ * This structure reduces parameter passing between functions and makes
273
+ * the parsing state explicit. All PyObject pointers in this struct are
274
+ * borrowed references except where noted.
275
+ *
276
+ * Shadow State Optimization:
277
+ * Hot flags (expect_segment, expect_playlist) are kept in C variables
278
+ * to avoid dict lookup overhead in the main parsing loop. They are
279
+ * synced to the Python state dict only when needed:
280
+ * - Before calling custom_tags_parser (so callback sees current state)
281
+ * - After custom_tags_parser returns (in case it modified state)
282
+ * - At the end of parsing (for final state consistency)
283
+ */
284
+ typedef struct {
285
+ m3u8_state *mod_state; /* Module state (borrowed) */
286
+ PyObject *data; /* Result dict being built (owned) */
287
+ PyObject *state; /* Parser state dict (owned) */
288
+ int strict; /* Strict parsing mode flag */
289
+ int lineno; /* Current line number (1-based) */
290
+ /* Shadow state for hot flags - avoids dict lookups in main loop */
291
+ int expect_segment; /* Shadow of state["expect_segment"] */
292
+ int expect_playlist; /* Shadow of state["expect_playlist"] */
293
+ } ParseContext;
294
+
295
+ /*
296
+ * Unified tag handler function type.
297
+ *
298
+ * All tag handlers receive the same arguments for consistency and to enable
299
+ * the dispatch table pattern. This mirrors Python's **parse_kwargs approach.
300
+ *
301
+ * Args:
302
+ * ctx: Parse context (holds mod_state, data, state, strict, lineno)
303
+ * line: Null-terminated line content (the full line including tag)
304
+ *
305
+ * Returns:
306
+ * 0 on success, -1 on failure with exception set
307
+ */
308
+ typedef int (*TagHandler)(ParseContext *ctx, const char *line);
309
+
310
+ /*
311
+ * Dispatch table entry for tag-to-handler mapping.
312
+ *
313
+ * Using a dispatch table instead of a long if/else chain:
314
+ * - Matches Python's DISPATCH dict pattern
315
+ * - More maintainable and readable
316
+ * - Easier to add/remove tags
317
+ * - Linear scan is fast for <50 tags (comparable to dict lookup overhead)
318
+ */
319
+ typedef struct {
320
+ const char *tag; /* Tag string, e.g., "#EXTINF" */
321
+ size_t tag_len; /* Pre-computed length for fast prefix matching */
322
+ TagHandler handler; /* Handler function */
323
+ } TagDispatch;
324
+
325
+ /*
326
+ * Sync shadow state TO Python dict (before custom_tags_parser or at end).
327
+ */
328
+ static int
329
+ sync_shadow_to_dict(ParseContext *ctx)
330
+ {
331
+ m3u8_state *mod_state = ctx->mod_state;
332
+ if (dict_set_interned(ctx->state, mod_state->str_expect_segment,
333
+ ctx->expect_segment ? Py_True : Py_False) < 0) {
334
+ return -1;
335
+ }
336
+ if (dict_set_interned(ctx->state, mod_state->str_expect_playlist,
337
+ ctx->expect_playlist ? Py_True : Py_False) < 0) {
338
+ return -1;
339
+ }
340
+ return 0;
341
+ }
342
+
343
+ /*
344
+ * Sync shadow state FROM Python dict (after custom_tags_parser modifies it).
345
+ */
346
+ static void
347
+ sync_shadow_from_dict(ParseContext *ctx)
348
+ {
349
+ m3u8_state *mod_state = ctx->mod_state;
350
+ PyObject *val;
351
+
352
+ val = dict_get_interned(ctx->state, mod_state->str_expect_segment);
353
+ ctx->expect_segment = (val == Py_True);
354
+
355
+ val = dict_get_interned(ctx->state, mod_state->str_expect_playlist);
356
+ ctx->expect_playlist = (val == Py_True);
357
+ }
358
+
359
+ /* Forward declaration for module definition */
360
+ static struct PyModuleDef m3u8_parser_module;
361
+
362
+ /* Get module state from module object */
363
+ static inline m3u8_state *
364
+ get_m3u8_state(PyObject *module)
365
+ {
366
+ void *state = PyModule_GetState(module);
367
+ assert(state != NULL);
368
+ return (m3u8_state *)state;
369
+ }
370
+
371
+ /*
372
+ * Initialize datetime-related cached objects in module state.
373
+ * Called during module initialization.
374
+ * Returns 0 on success, -1 on failure with exception set.
375
+ */
376
+ static int
377
+ init_datetime_cache(m3u8_state *state)
378
+ {
379
+ PyObject *datetime_mod = PyImport_ImportModule("datetime");
380
+ if (datetime_mod == NULL) {
381
+ return -1;
382
+ }
383
+
384
+ state->datetime_cls = PyObject_GetAttrString(datetime_mod, "datetime");
385
+ state->timedelta_cls = PyObject_GetAttrString(datetime_mod, "timedelta");
386
+
387
+ if (state->datetime_cls != NULL) {
388
+ state->fromisoformat_meth = PyObject_GetAttrString(
389
+ state->datetime_cls, "fromisoformat");
390
+ }
391
+
392
+ Py_DECREF(datetime_mod);
393
+
394
+ if (state->datetime_cls == NULL ||
395
+ state->timedelta_cls == NULL ||
396
+ state->fromisoformat_meth == NULL)
397
+ {
398
+ Py_CLEAR(state->datetime_cls);
399
+ Py_CLEAR(state->timedelta_cls);
400
+ Py_CLEAR(state->fromisoformat_meth);
401
+ return -1;
402
+ }
403
+ return 0;
404
+ }
405
+
406
+ /*
407
+ * Initialize interned string cache using X-macro expansion.
408
+ * Returns 0 on success, -1 on failure with exception set.
409
+ */
410
+ static int
411
+ init_interned_strings(m3u8_state *state)
412
+ {
413
+ #define INIT_INTERNED(name, str) \
414
+ state->name = PyUnicode_InternFromString(str); \
415
+ if (state->name == NULL) return -1;
416
+ INTERNED_STRINGS(INIT_INTERNED)
417
+ #undef INIT_INTERNED
418
+ return 0;
419
+ }
420
+
421
+ /*
422
+ * Raise ParseError with lineno and line arguments.
423
+ * Takes module state to get the ParseError class.
424
+ *
425
+ * Optimization: Uses direct tuple construction instead of Py_BuildValue
426
+ * to avoid format string parsing overhead.
427
+ */
428
+ static void
429
+ raise_parse_error(m3u8_state *state, int lineno, const char *line)
430
+ {
431
+ /* Direct tuple construction - faster than Py_BuildValue("(is)", ...) */
432
+ PyObject *py_lineno = PyLong_FromLong(lineno);
433
+ if (py_lineno == NULL) {
434
+ return;
435
+ }
436
+
437
+ PyObject *py_line = PyUnicode_FromString(line);
438
+ if (py_line == NULL) {
439
+ Py_DECREF(py_lineno);
440
+ return;
441
+ }
442
+
443
+ PyObject *args = PyTuple_Pack(2, py_lineno, py_line);
444
+ Py_DECREF(py_lineno);
445
+ Py_DECREF(py_line);
446
+ if (args == NULL) {
447
+ return;
448
+ }
449
+
450
+ PyObject *exc = PyObject_Call(state->ParseError, args, NULL);
451
+ Py_DECREF(args);
452
+
453
+ if (exc != NULL) {
454
+ PyErr_SetObject(state->ParseError, exc);
455
+ Py_DECREF(exc);
456
+ }
457
+ }
458
+
459
+ /*
460
+ * remove_quotes(), implemented at the Python level as:
461
+ *
462
+ * quotes = ('"', "'")
463
+ * if string.startswith(quotes) and string.endswith(quotes):
464
+ * return string[1:-1]
465
+ *
466
+ * Note the subtlety: Python does NOT require matching quote characters.
467
+ * We mirror that behavior for parity.
468
+ *
469
+ * Returns: new reference.
470
+ */
471
+ static PyObject *
472
+ remove_quotes_py(PyObject *str)
473
+ {
474
+ if (!PyUnicode_Check(str)) {
475
+ PyErr_SetString(PyExc_TypeError, "expected str");
476
+ return NULL;
477
+ }
478
+
479
+ Py_ssize_t len = PyUnicode_GetLength(str);
480
+ if (len < 2) {
481
+ return Py_NewRef(str);
482
+ }
483
+
484
+ Py_UCS4 first = PyUnicode_ReadChar(str, 0);
485
+ if (first == (Py_UCS4)-1 && PyErr_Occurred()) {
486
+ return NULL;
487
+ }
488
+ Py_UCS4 last = PyUnicode_ReadChar(str, len - 1);
489
+ if (last == (Py_UCS4)-1 && PyErr_Occurred()) {
490
+ return NULL;
491
+ }
492
+
493
+ if ((first == '"' || first == '\'') && (last == '"' || last == '\'')) {
494
+ return PyUnicode_Substring(str, 1, len - 1);
495
+ }
496
+ return Py_NewRef(str);
497
+ }
498
+
499
+ /*
500
+ * Strip leading and trailing whitespace from string in-place.
501
+ *
502
+ * Warning: This modifies the string in place by writing a NUL terminator.
503
+ * Only use on mutable strings (e.g., our reusable line buffer).
504
+ *
505
+ * Returns pointer to first non-whitespace character (may be same as input
506
+ * or point into the middle of the string).
507
+ */
508
+ static char *strip(char *str) {
509
+ while (ascii_isspace((unsigned char)*str)) str++;
510
+ if (*str == '\0') return str;
511
+ /* Safety: check length before computing end pointer to avoid UB */
512
+ size_t len = strlen(str);
513
+ if (len == 0) return str;
514
+ char *end = str + len - 1;
515
+ while (end > str && ascii_isspace((unsigned char)*end)) end--;
516
+ *(end + 1) = '\0';
517
+ return str;
518
+ }
519
+
520
+ /*
521
+ * Fast dict operations using interned string keys.
522
+ *
523
+ * These avoid the string creation overhead of PyDict_SetItemString by
524
+ * using pre-interned strings from module state. PyDict_SetItem with
525
+ * interned strings can use pointer comparison for fast key lookup.
526
+ */
527
+
528
+ /* Set dict[key] = value using interned key. Returns 0 on success, -1 on error. */
529
+ static inline int
530
+ dict_set_interned(PyObject *dict, PyObject *interned_key, PyObject *value)
531
+ {
532
+ return PyDict_SetItem(dict, interned_key, value);
533
+ }
534
+
535
+ /* Get dict[key] using interned key. Returns borrowed ref or NULL. */
536
+ static inline PyObject *
537
+ dict_get_interned(PyObject *dict, PyObject *interned_key)
538
+ {
539
+ return PyDict_GetItem(dict, interned_key);
540
+ }
541
+
542
+ /*
543
+ * Get or create segment dict in state using interned string.
544
+ * Returns borrowed reference on success, NULL with exception on failure.
545
+ */
546
+ static PyObject *
547
+ get_or_create_segment(m3u8_state *mod_state, PyObject *state)
548
+ {
549
+ PyObject *segment = dict_get_interned(state, mod_state->str_segment);
550
+ if (segment != NULL) {
551
+ return segment; /* borrowed reference */
552
+ }
553
+ segment = PyDict_New();
554
+ if (segment == NULL) {
555
+ return NULL;
556
+ }
557
+ if (dict_set_interned(state, mod_state->str_segment, segment) < 0) {
558
+ Py_DECREF(segment);
559
+ return NULL;
560
+ }
561
+ Py_DECREF(segment);
562
+ return dict_get_interned(state, mod_state->str_segment);
563
+ }
564
+
565
+ /* Utility: build list like Python's content.strip().splitlines() (preserve internal blanks) */
566
+ static PyObject *build_stripped_splitlines(const char *content) {
567
+ const unsigned char *p = (const unsigned char *)content;
568
+ const unsigned char *end = p + strlen(content);
569
+
570
+ while (p < end && ascii_isspace(*p)) p++;
571
+ while (end > p && ascii_isspace(*(end - 1))) end--;
572
+
573
+ PyObject *lines = PyList_New(0);
574
+ if (!lines) return NULL;
575
+
576
+ const unsigned char *line_start = p;
577
+ while (p < end) {
578
+ if (*p == '\n' || *p == '\r') {
579
+ PyObject *line = PyUnicode_FromStringAndSize((const char *)line_start,
580
+ (Py_ssize_t)(p - line_start));
581
+ if (!line) {
582
+ Py_DECREF(lines);
583
+ return NULL;
584
+ }
585
+ if (PyList_Append(lines, line) < 0) {
586
+ Py_DECREF(line);
587
+ Py_DECREF(lines);
588
+ return NULL;
589
+ }
590
+ Py_DECREF(line);
591
+
592
+ /* Consume newline sequence */
593
+ if (*p == '\r' && (p + 1) < end && *(p + 1) == '\n') p++;
594
+ p++;
595
+ line_start = p;
596
+ continue;
597
+ }
598
+ p++;
599
+ }
600
+
601
+ /* Last line (even if empty) */
602
+ PyObject *line = PyUnicode_FromStringAndSize((const char *)line_start,
603
+ (Py_ssize_t)(end - line_start));
604
+ if (!line) {
605
+ Py_DECREF(lines);
606
+ return NULL;
607
+ }
608
+ if (PyList_Append(lines, line) < 0) {
609
+ Py_DECREF(line);
610
+ Py_DECREF(lines);
611
+ return NULL;
612
+ }
613
+ Py_DECREF(line);
614
+
615
+ return lines;
616
+ }
617
+
618
+ /*
619
+ * Helper to initialize multiple list fields using interned keys.
620
+ * Returns 0 on success, -1 on failure.
621
+ */
622
+ static int
623
+ init_list_fields(PyObject *data, PyObject **keys, size_t count)
624
+ {
625
+ for (size_t i = 0; i < count; i++) {
626
+ PyObject *list = PyList_New(0);
627
+ if (list == NULL) {
628
+ return -1;
629
+ }
630
+ if (dict_set_interned(data, keys[i], list) < 0) {
631
+ Py_DECREF(list);
632
+ return -1;
633
+ }
634
+ Py_DECREF(list);
635
+ }
636
+ return 0;
637
+ }
638
+
639
+ /*
640
+ * Helper to initialize multiple dict fields using interned keys.
641
+ * Returns 0 on success, -1 on failure.
642
+ */
643
+ static int
644
+ init_dict_fields(PyObject *data, PyObject **keys, size_t count)
645
+ {
646
+ for (size_t i = 0; i < count; i++) {
647
+ PyObject *dict = PyDict_New();
648
+ if (dict == NULL) {
649
+ return -1;
650
+ }
651
+ if (dict_set_interned(data, keys[i], dict) < 0) {
652
+ Py_DECREF(dict);
653
+ return -1;
654
+ }
655
+ Py_DECREF(dict);
656
+ }
657
+ return 0;
658
+ }
659
+
660
+ /*
661
+ * Initialize the result data dictionary with default values.
662
+ *
663
+ * This sets up all the required keys with their initial values,
664
+ * matching the structure created by the Python parser.
665
+ *
666
+ * Uses interned strings for faster dict operations (pointer comparison
667
+ * instead of string hashing on each SetItem).
668
+ *
669
+ * Returns: New reference to data dict on success, NULL on failure.
670
+ */
671
+ static PyObject *
672
+ init_parse_data(m3u8_state *ms)
673
+ {
674
+ PyObject *data = PyDict_New();
675
+ if (data == NULL) {
676
+ return NULL;
677
+ }
678
+
679
+ /* Set scalar defaults using interned keys */
680
+ PyObject *zero = PyLong_FromLong(0);
681
+ if (zero == NULL) goto fail;
682
+ if (dict_set_interned(data, ms->str_media_sequence, zero) < 0) {
683
+ Py_DECREF(zero);
684
+ goto fail;
685
+ }
686
+ Py_DECREF(zero);
687
+
688
+ if (dict_set_interned(data, ms->str_is_variant, Py_False) < 0) goto fail;
689
+ if (dict_set_interned(data, ms->str_is_endlist, Py_False) < 0) goto fail;
690
+ if (dict_set_interned(data, ms->str_is_i_frames_only, Py_False) < 0) goto fail;
691
+ if (dict_set_interned(data, ms->str_is_independent_segments, Py_False) < 0) goto fail;
692
+ if (dict_set_interned(data, ms->str_is_images_only, Py_False) < 0) goto fail;
693
+ if (dict_set_interned(data, ms->str_playlist_type, Py_None) < 0) goto fail;
694
+
695
+ /* Initialize list fields using interned keys */
696
+ PyObject *list_keys[] = {
697
+ ms->str_playlists,
698
+ ms->str_segments,
699
+ ms->str_iframe_playlists,
700
+ ms->str_image_playlists,
701
+ ms->str_tiles,
702
+ ms->str_media,
703
+ ms->str_keys,
704
+ ms->str_rendition_reports,
705
+ ms->str_session_data,
706
+ ms->str_session_keys,
707
+ ms->str_segment_map,
708
+ };
709
+ if (init_list_fields(data, list_keys, sizeof(list_keys) / sizeof(list_keys[0])) < 0) {
710
+ goto fail;
711
+ }
712
+
713
+ /* Initialize dict fields using interned keys */
714
+ PyObject *dict_keys[] = {
715
+ ms->str_skip,
716
+ ms->str_part_inf,
717
+ };
718
+ if (init_dict_fields(data, dict_keys, sizeof(dict_keys) / sizeof(dict_keys[0])) < 0) {
719
+ goto fail;
720
+ }
721
+
722
+ return data;
723
+
724
+ fail:
725
+ Py_DECREF(data);
726
+ return NULL;
727
+ }
728
+
729
+ /*
730
+ * Initialize the parser state dictionary.
731
+ *
732
+ * The state dict tracks parsing progress and carries values between
733
+ * tags (e.g., current key, segment being built, etc.).
734
+ *
735
+ * Returns: New reference to state dict on success, NULL on failure.
736
+ */
737
+ static PyObject *
738
+ init_parse_state(m3u8_state *mod_state)
739
+ {
740
+ PyObject *state = PyDict_New();
741
+ if (state == NULL) {
742
+ return NULL;
743
+ }
744
+
745
+ /* Use interned strings for commonly-accessed keys */
746
+ if (dict_set_interned(state, mod_state->str_expect_segment, Py_False) < 0) goto fail;
747
+ if (dict_set_interned(state, mod_state->str_expect_playlist, Py_False) < 0) goto fail;
748
+
749
+ return state;
750
+
751
+ fail:
752
+ Py_DECREF(state);
753
+ return NULL;
754
+ }
755
+
756
+ /*
757
+ * Add seconds to a datetime object: dt + timedelta(seconds=secs)
758
+ * Returns new reference on success, NULL with exception on failure.
759
+ */
760
+ static PyObject *
761
+ datetime_add_seconds(m3u8_state *state, PyObject *dt, double secs)
762
+ {
763
+ PyObject *args = PyTuple_New(0);
764
+ if (args == NULL) {
765
+ return NULL;
766
+ }
767
+
768
+ PyObject *kwargs = Py_BuildValue("{s:d}", "seconds", secs);
769
+ if (kwargs == NULL) {
770
+ Py_DECREF(args);
771
+ return NULL;
772
+ }
773
+
774
+ PyObject *delta = PyObject_Call(state->timedelta_cls, args, kwargs);
775
+ Py_DECREF(kwargs);
776
+ Py_DECREF(args);
777
+ if (delta == NULL) {
778
+ return NULL;
779
+ }
780
+
781
+ PyObject *new_dt = PyNumber_Add(dt, delta);
782
+ Py_DECREF(delta);
783
+ return new_dt;
784
+ }
785
+
786
+ /*
787
+ * Create normalized Python string directly from buffer (zero-copy optimization).
788
+ *
789
+ * This avoids malloc for keys < 64 chars (covers 99%+ of real-world cases).
790
+ * Normalization: replace '-' with '_', lowercase, strip whitespace.
791
+ *
792
+ * Returns: New reference to Python string, or NULL with exception set.
793
+ */
794
+ static PyObject *
795
+ create_normalized_key(const char *s, Py_ssize_t len)
796
+ {
797
+ char stack_buf[64];
798
+ char *buf = stack_buf;
799
+ int use_heap = (len >= (Py_ssize_t)sizeof(stack_buf));
800
+
801
+ if (use_heap) {
802
+ buf = PyMem_Malloc(len + 1);
803
+ if (buf == NULL) {
804
+ return PyErr_NoMemory();
805
+ }
806
+ }
807
+
808
+ /* Normalize: skip leading whitespace, replace - with _, tolower */
809
+ Py_ssize_t in_idx = 0;
810
+ Py_ssize_t out_len = 0;
811
+
812
+ /* Skip leading whitespace */
813
+ while (in_idx < len && ascii_isspace((unsigned char)s[in_idx])) {
814
+ in_idx++;
815
+ }
816
+
817
+ /* Transform characters */
818
+ for (; in_idx < len; in_idx++) {
819
+ unsigned char c = (unsigned char)s[in_idx];
820
+ if (c == '-') {
821
+ c = '_';
822
+ } else {
823
+ c = ascii_tolower(c);
824
+ }
825
+ buf[out_len++] = (char)c;
826
+ }
827
+
828
+ /* Strip trailing whitespace */
829
+ while (out_len > 0 && ascii_isspace((unsigned char)buf[out_len - 1])) {
830
+ out_len--;
831
+ }
832
+ buf[out_len] = '\0';
833
+
834
+ PyObject *res = PyUnicode_FromStringAndSize(buf, out_len);
835
+
836
+ if (use_heap) {
837
+ PyMem_Free(buf);
838
+ }
839
+ return res;
840
+ }
841
+
842
+ /* Utility: remove quotes from string */
843
+ /* Utility: delete a key from dict by interned key; ignore missing-key KeyError. */
844
+ static int
845
+ del_item_interned_ignore_keyerror(PyObject *dict, PyObject *interned_key)
846
+ {
847
+ if (PyDict_DelItem(dict, interned_key) == 0) {
848
+ return 0;
849
+ }
850
+ if (PyErr_ExceptionMatches(PyExc_KeyError)) {
851
+ PyErr_Clear();
852
+ return 0;
853
+ }
854
+ return -1;
855
+ }
856
+
857
+ /*
858
+ * Helper: Transfer boolean flag from state to segment.
859
+ *
860
+ * Sets segment[key] = True if state[key] exists, False otherwise.
861
+ * Deletes state[key] if it existed.
862
+ * Returns 0 on success, -1 on failure.
863
+ */
864
+ static int
865
+ transfer_state_bool(PyObject *state, PyObject *segment, PyObject *key)
866
+ {
867
+ PyObject *val = PyDict_GetItem(state, key); /* borrowed ref, no error */
868
+ if (PyDict_SetItem(segment, key, val ? Py_True : Py_False) < 0) return -1;
869
+ if (val && del_item_interned_ignore_keyerror(state, key) < 0) return -1;
870
+ return 0;
871
+ }
872
+
873
+ /*
874
+ * Helper: Transfer value from state to segment (or None if missing).
875
+ *
876
+ * Sets segment[key] = state[key] if exists, else segment[key] = None.
877
+ * Deletes state[key] if it existed.
878
+ * Returns 0 on success, -1 on failure.
879
+ */
880
+ static int
881
+ transfer_state_value(PyObject *state, PyObject *segment, PyObject *key)
882
+ {
883
+ PyObject *val = PyDict_GetItem(state, key); /* borrowed ref */
884
+ if (PyDict_SetItem(segment, key, val ? val : Py_None) < 0) return -1;
885
+ if (val && del_item_interned_ignore_keyerror(state, key) < 0) return -1;
886
+ return 0;
887
+ }
888
+
889
+ /*
890
+ * Zero-copy attribute list parser.
891
+ *
892
+ * Parses "KEY=value,KEY2=value2" format directly from buffer pointers.
893
+ * Creates Python objects directly without intermediate C string allocations.
894
+ *
895
+ * Args:
896
+ * start: Pointer to start of attribute list (after the ":" in the tag)
897
+ * end: Pointer to end of buffer
898
+ *
899
+ * Returns: New reference to dict, or NULL with exception set.
900
+ */
901
+ static PyObject *
902
+ parse_attribute_list_raw(const char *start, const char *end)
903
+ {
904
+ PyObject *attrs = PyDict_New();
905
+ if (attrs == NULL) {
906
+ return NULL;
907
+ }
908
+
909
+ const char *p = start;
910
+ while (p < end) {
911
+ /* Skip leading whitespace and commas */
912
+ while (p < end && (ascii_isspace((unsigned char)*p) || *p == ',')) {
913
+ p++;
914
+ }
915
+ if (p >= end) {
916
+ break;
917
+ }
918
+
919
+ /* Find key */
920
+ const char *key_start = p;
921
+ while (p < end && *p != '=' && *p != ',') {
922
+ p++;
923
+ }
924
+ const char *key_end = p;
925
+
926
+ /* Create normalized key directly from buffer */
927
+ PyObject *py_key = create_normalized_key(key_start, key_end - key_start);
928
+ if (py_key == NULL) {
929
+ Py_DECREF(attrs);
930
+ return NULL;
931
+ }
932
+
933
+ PyObject *py_val = NULL;
934
+
935
+ if (p < end && *p == '=') {
936
+ p++; /* Skip '=' */
937
+
938
+ if (p < end && (*p == '"' || *p == '\'')) {
939
+ /* Quoted string - include quotes in value for later processing */
940
+ char quote = *p;
941
+ const char *val_start = p; /* Include opening quote */
942
+ p++; /* Skip opening quote */
943
+ while (p < end && *p != quote) {
944
+ p++;
945
+ }
946
+ if (p < end) {
947
+ p++; /* Include closing quote */
948
+ }
949
+ /* Create string with quotes (for compatibility with typed parser) */
950
+ py_val = PyUnicode_FromStringAndSize(val_start, p - val_start);
951
+ } else {
952
+ /* Unquoted value */
953
+ const char *val_start = p;
954
+ while (p < end && *p != ',') {
955
+ p++;
956
+ }
957
+ /* Strip trailing whitespace from unquoted values */
958
+ const char *val_end = p;
959
+ while (val_end > val_start && ascii_isspace((unsigned char)*(val_end - 1))) {
960
+ val_end--;
961
+ }
962
+ py_val = PyUnicode_FromStringAndSize(val_start, val_end - val_start);
963
+ }
964
+ } else {
965
+ /* Key without value - store the key content as value with empty key */
966
+ /* This handles formats like "EXT-X-CUE-OUT-CONT:2.436/120" */
967
+ Py_ssize_t key_len = key_end - key_start;
968
+ /* Strip trailing whitespace */
969
+ while (key_len > 0 && ascii_isspace((unsigned char)key_start[key_len - 1])) {
970
+ key_len--;
971
+ }
972
+ py_val = PyUnicode_FromStringAndSize(key_start, key_len);
973
+ Py_DECREF(py_key);
974
+ py_key = PyUnicode_FromString("");
975
+ if (py_key == NULL) {
976
+ Py_XDECREF(py_val);
977
+ Py_DECREF(attrs);
978
+ return NULL;
979
+ }
980
+ }
981
+
982
+ if (py_val == NULL) {
983
+ Py_DECREF(py_key);
984
+ Py_DECREF(attrs);
985
+ return NULL;
986
+ }
987
+
988
+ if (PyDict_SetItem(attrs, py_key, py_val) < 0) {
989
+ Py_DECREF(py_key);
990
+ Py_DECREF(py_val);
991
+ Py_DECREF(attrs);
992
+ return NULL;
993
+ }
994
+
995
+ Py_DECREF(py_key);
996
+ Py_DECREF(py_val);
997
+ }
998
+
999
+ return attrs;
1000
+ }
1001
+
1002
+ /*
1003
+ * Parse attribute list from a line like "PREFIX:KEY=value,KEY2=value2"
1004
+ *
1005
+ * This is a wrapper around parse_attribute_list_raw that handles the
1006
+ * prefix-skipping logic for compatibility with existing callers.
1007
+ *
1008
+ * Returns new reference to dict on success, NULL with exception on failure.
1009
+ */
1010
+ static PyObject *
1011
+ parse_attribute_list(const char *line, const char *prefix)
1012
+ {
1013
+ /* Skip prefix if present */
1014
+ const char *content = line;
1015
+ if (prefix != NULL) {
1016
+ size_t prefix_len = strlen(prefix);
1017
+ if (strncmp(line, prefix, prefix_len) == 0) {
1018
+ content = line + prefix_len;
1019
+ if (*content == ':') {
1020
+ content++;
1021
+ }
1022
+ } else {
1023
+ /* Prefix not found - return empty dict */
1024
+ return PyDict_New();
1025
+ }
1026
+ }
1027
+
1028
+ /* Delegate to zero-copy implementation */
1029
+ return parse_attribute_list_raw(content, content + strlen(content));
1030
+ }
1031
+
1032
+ /* Parse a key/value attribute list with type conversion */
1033
+ typedef enum {
1034
+ ATTR_STRING,
1035
+ ATTR_INT,
1036
+ ATTR_FLOAT,
1037
+ ATTR_QUOTED_STRING,
1038
+ ATTR_BANDWIDTH
1039
+ } AttrType;
1040
+
1041
+ typedef struct {
1042
+ const char *name;
1043
+ AttrType type;
1044
+ } AttrParser;
1045
+
1046
+ /*
1047
+ * Schema-aware attribute parser.
1048
+ *
1049
+ * This is the optimized version that converts values to their final types
1050
+ * directly during parsing, avoiding the "double allocation" problem where
1051
+ * we first create a Python string, then convert it to int/float.
1052
+ *
1053
+ * The schema (parsers array) tells us the expected type for each key,
1054
+ * so we can parse directly to the correct Python type.
1055
+ *
1056
+ * Args:
1057
+ * start: Pointer to start of attribute list (after "TAG:")
1058
+ * end: Pointer to end of content
1059
+ * parsers: Array of AttrParser structs defining key->type mappings
1060
+ * num_parsers: Number of parsers in array
1061
+ *
1062
+ * Returns: New reference to dict on success, NULL with exception set.
1063
+ */
1064
+ static PyObject *
1065
+ parse_attributes_with_schema(const char *start, const char *end,
1066
+ const AttrParser *parsers, size_t num_parsers)
1067
+ {
1068
+ PyObject *attrs = PyDict_New();
1069
+ if (attrs == NULL) {
1070
+ return NULL;
1071
+ }
1072
+
1073
+ const char *p = start;
1074
+ while (p < end) {
1075
+ /* Skip leading whitespace and commas */
1076
+ while (p < end && (ascii_isspace((unsigned char)*p) || *p == ',')) {
1077
+ p++;
1078
+ }
1079
+ if (p >= end) {
1080
+ break;
1081
+ }
1082
+
1083
+ /* Find key */
1084
+ const char *key_start = p;
1085
+ while (p < end && *p != '=' && *p != ',') {
1086
+ p++;
1087
+ }
1088
+ const char *key_end = p;
1089
+ size_t key_len = key_end - key_start;
1090
+
1091
+ /* Determine type via schema lookup BEFORE creating Python objects */
1092
+ AttrType type = ATTR_STRING;
1093
+ if (parsers != NULL) {
1094
+ for (size_t i = 0; i < num_parsers; i++) {
1095
+ if (buffer_matches_key(key_start, key_len, parsers[i].name)) {
1096
+ type = parsers[i].type;
1097
+ break;
1098
+ }
1099
+ }
1100
+ }
1101
+
1102
+ /* Create normalized Python key only once, after schema lookup */
1103
+ PyObject *py_key = create_normalized_key(key_start, key_len);
1104
+ if (py_key == NULL) {
1105
+ Py_DECREF(attrs);
1106
+ return NULL;
1107
+ }
1108
+
1109
+ PyObject *py_val = NULL;
1110
+
1111
+ if (p < end && *p == '=') {
1112
+ p++; /* Skip '=' */
1113
+
1114
+ if (p < end && (*p == '"' || *p == '\'')) {
1115
+ /* Quoted value */
1116
+ char quote = *p++;
1117
+ const char *full_start = p - 1; /* include opening quote */
1118
+ const char *val_start = p; /* inside quotes */
1119
+ while (p < end && *p != quote) {
1120
+ p++;
1121
+ }
1122
+ const char *val_end = p; /* points at closing quote or end */
1123
+ int has_closing_quote = (p < end && *p == quote);
1124
+ Py_ssize_t val_len = val_end - val_start;
1125
+ if (has_closing_quote) {
1126
+ p++; /* Skip closing quote */
1127
+ }
1128
+
1129
+ /*
1130
+ * Python parity:
1131
+ * - Known "quoted string" attributes use remove_quotes() => no quotes
1132
+ * - Unknown attributes keep the original token (including quotes)
1133
+ */
1134
+ if (type == ATTR_QUOTED_STRING) {
1135
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1136
+ } else if (type == ATTR_STRING) {
1137
+ Py_ssize_t full_len = has_closing_quote
1138
+ ? (Py_ssize_t)((val_end - full_start) + 1)
1139
+ : (Py_ssize_t)(val_end - full_start);
1140
+ py_val = PyUnicode_FromStringAndSize(full_start, full_len);
1141
+ } else if (type == ATTR_INT || type == ATTR_BANDWIDTH) {
1142
+ /* Numeric inside quotes - parse directly */
1143
+ char num_buf[64];
1144
+ if (val_len < (Py_ssize_t)sizeof(num_buf)) {
1145
+ memcpy(num_buf, val_start, val_len);
1146
+ num_buf[val_len] = '\0';
1147
+ if (type == ATTR_BANDWIDTH) {
1148
+ double v = PyOS_string_to_double(num_buf, NULL, NULL);
1149
+ if (v == -1.0 && PyErr_Occurred()) {
1150
+ PyErr_Clear();
1151
+ } else {
1152
+ py_val = PyLong_FromDouble(v);
1153
+ }
1154
+ } else {
1155
+ py_val = PyLong_FromString(num_buf, NULL, 10);
1156
+ if (py_val == NULL) {
1157
+ PyErr_Clear();
1158
+ }
1159
+ }
1160
+ }
1161
+ /* Fallback to string if conversion fails */
1162
+ if (py_val == NULL) {
1163
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1164
+ }
1165
+ } else if (type == ATTR_FLOAT) {
1166
+ char num_buf[64];
1167
+ if (val_len < (Py_ssize_t)sizeof(num_buf)) {
1168
+ memcpy(num_buf, val_start, val_len);
1169
+ num_buf[val_len] = '\0';
1170
+ double v = PyOS_string_to_double(num_buf, NULL, NULL);
1171
+ if (v == -1.0 && PyErr_Occurred()) {
1172
+ PyErr_Clear();
1173
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1174
+ } else {
1175
+ py_val = PyFloat_FromDouble(v);
1176
+ }
1177
+ } else {
1178
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1179
+ }
1180
+ } else {
1181
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1182
+ }
1183
+ } else {
1184
+ /* Unquoted value */
1185
+ const char *val_start = p;
1186
+ while (p < end && *p != ',') {
1187
+ p++;
1188
+ }
1189
+ /* Strip trailing whitespace */
1190
+ const char *val_end = p;
1191
+ while (val_end > val_start && ascii_isspace((unsigned char)*(val_end - 1))) {
1192
+ val_end--;
1193
+ }
1194
+ Py_ssize_t val_len = val_end - val_start;
1195
+
1196
+ /* Direct type conversion - no intermediate Python string! */
1197
+ if (type == ATTR_INT) {
1198
+ char num_buf[64];
1199
+ if (val_len < (Py_ssize_t)sizeof(num_buf)) {
1200
+ memcpy(num_buf, val_start, val_len);
1201
+ num_buf[val_len] = '\0';
1202
+ py_val = PyLong_FromString(num_buf, NULL, 10);
1203
+ if (py_val == NULL) {
1204
+ PyErr_Clear();
1205
+ }
1206
+ }
1207
+ if (py_val == NULL) {
1208
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1209
+ }
1210
+ } else if (type == ATTR_BANDWIDTH) {
1211
+ char num_buf[64];
1212
+ if (val_len < (Py_ssize_t)sizeof(num_buf)) {
1213
+ memcpy(num_buf, val_start, val_len);
1214
+ num_buf[val_len] = '\0';
1215
+ double v = PyOS_string_to_double(num_buf, NULL, NULL);
1216
+ if (v == -1.0 && PyErr_Occurred()) {
1217
+ PyErr_Clear();
1218
+ } else {
1219
+ py_val = PyLong_FromDouble(v);
1220
+ }
1221
+ }
1222
+ if (py_val == NULL) {
1223
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1224
+ }
1225
+ } else if (type == ATTR_FLOAT) {
1226
+ char num_buf[64];
1227
+ if (val_len < (Py_ssize_t)sizeof(num_buf)) {
1228
+ memcpy(num_buf, val_start, val_len);
1229
+ num_buf[val_len] = '\0';
1230
+ double v = PyOS_string_to_double(num_buf, NULL, NULL);
1231
+ if (v == -1.0 && PyErr_Occurred()) {
1232
+ PyErr_Clear();
1233
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1234
+ } else {
1235
+ py_val = PyFloat_FromDouble(v);
1236
+ }
1237
+ } else {
1238
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1239
+ }
1240
+ } else {
1241
+ /* ATTR_STRING or ATTR_QUOTED_STRING (unquoted case) */
1242
+ py_val = PyUnicode_FromStringAndSize(val_start, val_len);
1243
+ }
1244
+ }
1245
+ } else {
1246
+ /* Key without value - store key content as value with empty key */
1247
+ Py_ssize_t key_len = key_end - key_start;
1248
+ while (key_len > 0 && ascii_isspace((unsigned char)key_start[key_len - 1])) {
1249
+ key_len--;
1250
+ }
1251
+ py_val = PyUnicode_FromStringAndSize(key_start, key_len);
1252
+ Py_DECREF(py_key);
1253
+ py_key = PyUnicode_FromString("");
1254
+ if (py_key == NULL) {
1255
+ Py_XDECREF(py_val);
1256
+ Py_DECREF(attrs);
1257
+ return NULL;
1258
+ }
1259
+ }
1260
+
1261
+ if (py_val == NULL) {
1262
+ Py_DECREF(py_key);
1263
+ Py_DECREF(attrs);
1264
+ return NULL;
1265
+ }
1266
+
1267
+ if (PyDict_SetItem(attrs, py_key, py_val) < 0) {
1268
+ Py_DECREF(py_key);
1269
+ Py_DECREF(py_val);
1270
+ Py_DECREF(attrs);
1271
+ return NULL;
1272
+ }
1273
+
1274
+ Py_DECREF(py_key);
1275
+ Py_DECREF(py_val);
1276
+ }
1277
+
1278
+ return attrs;
1279
+ }
1280
+
1281
+ /*
1282
+ * Wrapper for parse_attributes_with_schema that handles prefix skipping.
1283
+ * This maintains backward compatibility with existing callers.
1284
+ */
1285
+ static PyObject *parse_typed_attribute_list(const char *line, const char *prefix,
1286
+ const AttrParser *parsers, size_t num_parsers) {
1287
+ /* Skip prefix if present */
1288
+ const char *content = line;
1289
+ if (prefix != NULL) {
1290
+ size_t prefix_len = strlen(prefix);
1291
+ if (strncmp(line, prefix, prefix_len) == 0) {
1292
+ content = line + prefix_len;
1293
+ if (*content == ':') {
1294
+ content++;
1295
+ }
1296
+ } else {
1297
+ /* Prefix not found - return empty dict */
1298
+ return PyDict_New();
1299
+ }
1300
+ }
1301
+
1302
+ /* Delegate to schema-aware parser */
1303
+ return parse_attributes_with_schema(content, content + strlen(content),
1304
+ parsers, num_parsers);
1305
+ }
1306
+
1307
+ /* Stream info attribute parsers */
1308
+ static const AttrParser stream_inf_parsers[] = {
1309
+ {"codecs", ATTR_QUOTED_STRING},
1310
+ {"audio", ATTR_QUOTED_STRING},
1311
+ {"video", ATTR_QUOTED_STRING},
1312
+ {"video_range", ATTR_QUOTED_STRING},
1313
+ {"subtitles", ATTR_QUOTED_STRING},
1314
+ {"pathway_id", ATTR_QUOTED_STRING},
1315
+ {"stable_variant_id", ATTR_QUOTED_STRING},
1316
+ {"program_id", ATTR_INT},
1317
+ {"bandwidth", ATTR_BANDWIDTH},
1318
+ {"average_bandwidth", ATTR_INT},
1319
+ {"frame_rate", ATTR_FLOAT},
1320
+ {"hdcp_level", ATTR_STRING},
1321
+ };
1322
+ #define NUM_STREAM_INF_PARSERS (sizeof(stream_inf_parsers) / sizeof(stream_inf_parsers[0]))
1323
+
1324
+ /* Media attribute parsers */
1325
+ static const AttrParser media_parsers[] = {
1326
+ {"uri", ATTR_QUOTED_STRING},
1327
+ {"group_id", ATTR_QUOTED_STRING},
1328
+ {"language", ATTR_QUOTED_STRING},
1329
+ {"assoc_language", ATTR_QUOTED_STRING},
1330
+ {"name", ATTR_QUOTED_STRING},
1331
+ {"instream_id", ATTR_QUOTED_STRING},
1332
+ {"characteristics", ATTR_QUOTED_STRING},
1333
+ {"channels", ATTR_QUOTED_STRING},
1334
+ {"stable_rendition_id", ATTR_QUOTED_STRING},
1335
+ {"thumbnails", ATTR_QUOTED_STRING},
1336
+ {"image", ATTR_QUOTED_STRING},
1337
+ };
1338
+ #define NUM_MEDIA_PARSERS (sizeof(media_parsers) / sizeof(media_parsers[0]))
1339
+
1340
+ /* Part attribute parsers */
1341
+ static const AttrParser part_parsers[] = {
1342
+ {"uri", ATTR_QUOTED_STRING},
1343
+ {"duration", ATTR_FLOAT},
1344
+ {"independent", ATTR_STRING},
1345
+ {"gap", ATTR_STRING},
1346
+ {"byterange", ATTR_STRING},
1347
+ };
1348
+ #define NUM_PART_PARSERS (sizeof(part_parsers) / sizeof(part_parsers[0]))
1349
+
1350
+ /* Rendition report parsers */
1351
+ static const AttrParser rendition_report_parsers[] = {
1352
+ {"uri", ATTR_QUOTED_STRING},
1353
+ {"last_msn", ATTR_INT},
1354
+ {"last_part", ATTR_INT},
1355
+ };
1356
+ #define NUM_RENDITION_REPORT_PARSERS (sizeof(rendition_report_parsers) / sizeof(rendition_report_parsers[0]))
1357
+
1358
+ /* Skip parsers */
1359
+ static const AttrParser skip_parsers[] = {
1360
+ {"recently_removed_dateranges", ATTR_QUOTED_STRING},
1361
+ {"skipped_segments", ATTR_INT},
1362
+ };
1363
+ #define NUM_SKIP_PARSERS (sizeof(skip_parsers) / sizeof(skip_parsers[0]))
1364
+
1365
+ /* Server control parsers */
1366
+ static const AttrParser server_control_parsers[] = {
1367
+ {"can_block_reload", ATTR_STRING},
1368
+ {"hold_back", ATTR_FLOAT},
1369
+ {"part_hold_back", ATTR_FLOAT},
1370
+ {"can_skip_until", ATTR_FLOAT},
1371
+ {"can_skip_dateranges", ATTR_STRING},
1372
+ };
1373
+ #define NUM_SERVER_CONTROL_PARSERS (sizeof(server_control_parsers) / sizeof(server_control_parsers[0]))
1374
+
1375
+ /* Part inf parsers */
1376
+ static const AttrParser part_inf_parsers[] = {
1377
+ {"part_target", ATTR_FLOAT},
1378
+ };
1379
+ #define NUM_PART_INF_PARSERS (sizeof(part_inf_parsers) / sizeof(part_inf_parsers[0]))
1380
+
1381
+ /* Preload hint parsers */
1382
+ static const AttrParser preload_hint_parsers[] = {
1383
+ {"uri", ATTR_QUOTED_STRING},
1384
+ {"type", ATTR_STRING},
1385
+ {"byterange_start", ATTR_INT},
1386
+ {"byterange_length", ATTR_INT},
1387
+ };
1388
+ #define NUM_PRELOAD_HINT_PARSERS (sizeof(preload_hint_parsers) / sizeof(preload_hint_parsers[0]))
1389
+
1390
+ /* Daterange parsers */
1391
+ static const AttrParser daterange_parsers[] = {
1392
+ {"id", ATTR_QUOTED_STRING},
1393
+ {"class", ATTR_QUOTED_STRING},
1394
+ {"start_date", ATTR_QUOTED_STRING},
1395
+ {"end_date", ATTR_QUOTED_STRING},
1396
+ {"duration", ATTR_FLOAT},
1397
+ {"planned_duration", ATTR_FLOAT},
1398
+ {"end_on_next", ATTR_STRING},
1399
+ {"scte35_cmd", ATTR_STRING},
1400
+ {"scte35_out", ATTR_STRING},
1401
+ {"scte35_in", ATTR_STRING},
1402
+ };
1403
+ #define NUM_DATERANGE_PARSERS (sizeof(daterange_parsers) / sizeof(daterange_parsers[0]))
1404
+
1405
+ /* Session data parsers */
1406
+ static const AttrParser session_data_parsers[] = {
1407
+ {"data_id", ATTR_QUOTED_STRING},
1408
+ {"value", ATTR_QUOTED_STRING},
1409
+ {"uri", ATTR_QUOTED_STRING},
1410
+ {"language", ATTR_QUOTED_STRING},
1411
+ };
1412
+ #define NUM_SESSION_DATA_PARSERS (sizeof(session_data_parsers) / sizeof(session_data_parsers[0]))
1413
+
1414
+ /* Content steering parsers */
1415
+ static const AttrParser content_steering_parsers[] = {
1416
+ {"server_uri", ATTR_QUOTED_STRING},
1417
+ {"pathway_id", ATTR_QUOTED_STRING},
1418
+ };
1419
+ #define NUM_CONTENT_STEERING_PARSERS (sizeof(content_steering_parsers) / sizeof(content_steering_parsers[0]))
1420
+
1421
+ /* X-MAP parsers */
1422
+ static const AttrParser x_map_parsers[] = {
1423
+ {"uri", ATTR_QUOTED_STRING},
1424
+ {"byterange", ATTR_QUOTED_STRING},
1425
+ };
1426
+ #define NUM_X_MAP_PARSERS (sizeof(x_map_parsers) / sizeof(x_map_parsers[0]))
1427
+
1428
+ /* Start parsers */
1429
+ static const AttrParser start_parsers[] = {
1430
+ {"time_offset", ATTR_FLOAT},
1431
+ };
1432
+ #define NUM_START_PARSERS (sizeof(start_parsers) / sizeof(start_parsers[0]))
1433
+
1434
+ /* Tiles parsers */
1435
+ static const AttrParser tiles_parsers[] = {
1436
+ {"uri", ATTR_QUOTED_STRING},
1437
+ {"resolution", ATTR_STRING},
1438
+ {"layout", ATTR_STRING},
1439
+ {"duration", ATTR_FLOAT},
1440
+ };
1441
+ #define NUM_TILES_PARSERS (sizeof(tiles_parsers) / sizeof(tiles_parsers[0]))
1442
+
1443
+ /* Image stream inf parsers */
1444
+ static const AttrParser image_stream_inf_parsers[] = {
1445
+ {"codecs", ATTR_QUOTED_STRING},
1446
+ {"uri", ATTR_QUOTED_STRING},
1447
+ {"pathway_id", ATTR_QUOTED_STRING},
1448
+ {"stable_variant_id", ATTR_QUOTED_STRING},
1449
+ {"program_id", ATTR_INT},
1450
+ {"bandwidth", ATTR_INT},
1451
+ {"average_bandwidth", ATTR_INT},
1452
+ {"resolution", ATTR_STRING},
1453
+ };
1454
+ #define NUM_IMAGE_STREAM_INF_PARSERS (sizeof(image_stream_inf_parsers) / sizeof(image_stream_inf_parsers[0]))
1455
+
1456
+ /* IFrame stream inf parsers */
1457
+ static const AttrParser iframe_stream_inf_parsers[] = {
1458
+ {"codecs", ATTR_QUOTED_STRING},
1459
+ {"uri", ATTR_QUOTED_STRING},
1460
+ {"pathway_id", ATTR_QUOTED_STRING},
1461
+ {"stable_variant_id", ATTR_QUOTED_STRING},
1462
+ {"program_id", ATTR_INT},
1463
+ {"bandwidth", ATTR_INT},
1464
+ {"average_bandwidth", ATTR_INT},
1465
+ {"hdcp_level", ATTR_STRING},
1466
+ };
1467
+ #define NUM_IFRAME_STREAM_INF_PARSERS (sizeof(iframe_stream_inf_parsers) / sizeof(iframe_stream_inf_parsers[0]))
1468
+
1469
+ /* Cueout cont parsers */
1470
+ static const AttrParser cueout_cont_parsers[] = {
1471
+ {"duration", ATTR_QUOTED_STRING},
1472
+ {"elapsedtime", ATTR_QUOTED_STRING},
1473
+ {"scte35", ATTR_QUOTED_STRING},
1474
+ };
1475
+ #define NUM_CUEOUT_CONT_PARSERS (sizeof(cueout_cont_parsers) / sizeof(cueout_cont_parsers[0]))
1476
+
1477
+ /* Cueout parsers */
1478
+ static const AttrParser cueout_parsers[] = {
1479
+ {"cue", ATTR_QUOTED_STRING},
1480
+ };
1481
+ #define NUM_CUEOUT_PARSERS (sizeof(cueout_parsers) / sizeof(cueout_parsers[0]))
1482
+
1483
+
1484
+ /*
1485
+ * Helper: parse attribute list with quote removal.
1486
+ * Returns new dict with unquoted values, or NULL on error.
1487
+ */
1488
+ static PyObject *
1489
+ parse_attrs_unquoted(const char *line, const char *tag)
1490
+ {
1491
+ PyObject *raw_attrs = parse_attribute_list(line, tag);
1492
+ if (!raw_attrs) return NULL;
1493
+
1494
+ PyObject *result = PyDict_New();
1495
+ if (!result) { Py_DECREF(raw_attrs); return NULL; }
1496
+
1497
+ PyObject *k, *v;
1498
+ Py_ssize_t pos = 0;
1499
+ while (PyDict_Next(raw_attrs, &pos, &k, &v)) {
1500
+ PyObject *unquoted = remove_quotes_py(v);
1501
+ if (unquoted == NULL || PyDict_SetItem(result, k, unquoted) < 0) {
1502
+ Py_XDECREF(unquoted);
1503
+ Py_DECREF(result);
1504
+ Py_DECREF(raw_attrs);
1505
+ return NULL;
1506
+ }
1507
+ Py_DECREF(unquoted);
1508
+ }
1509
+ Py_DECREF(raw_attrs);
1510
+ return result;
1511
+ }
1512
+
1513
+ /* Parse a key tag */
1514
+ static int
1515
+ parse_key(m3u8_state *mod_state, const char *line, PyObject *data, PyObject *state)
1516
+ {
1517
+ PyObject *key = parse_attrs_unquoted(line, EXT_X_KEY);
1518
+ if (!key) return -1;
1519
+
1520
+ /* Set current key in state */
1521
+ if (dict_set_interned(state, mod_state->str_current_key, key) < 0) {
1522
+ Py_DECREF(key);
1523
+ return -1;
1524
+ }
1525
+
1526
+ /* Add to keys list if not already present */
1527
+ PyObject *keys = dict_get_interned(data, mod_state->str_keys);
1528
+ if (keys) {
1529
+ int found = PySequence_Contains(keys, key);
1530
+ if (found < 0) {
1531
+ Py_DECREF(key);
1532
+ return -1;
1533
+ }
1534
+ if (found == 0) {
1535
+ if (PyList_Append(keys, key) < 0) {
1536
+ Py_DECREF(key);
1537
+ return -1;
1538
+ }
1539
+ }
1540
+ }
1541
+
1542
+ Py_DECREF(key);
1543
+ return 0;
1544
+ }
1545
+
1546
+ /*
1547
+ * Parse #EXTINF tag.
1548
+ * Returns 0 on success, -1 on failure with exception set.
1549
+ */
1550
+ static int
1551
+ parse_extinf(m3u8_state *mod_state, const char *line, PyObject *state,
1552
+ int lineno, int strict)
1553
+ {
1554
+ const char *content = line + strlen(EXTINF) + 1; /* Skip "#EXTINF:" */
1555
+
1556
+ /* Find comma separator */
1557
+ const char *comma = strchr(content, ',');
1558
+ double duration;
1559
+ const char *title = "";
1560
+
1561
+ if (comma != NULL) {
1562
+ char duration_str[64];
1563
+ size_t dur_len = comma - content;
1564
+ if (dur_len >= sizeof(duration_str)) {
1565
+ dur_len = sizeof(duration_str) - 1;
1566
+ }
1567
+ memcpy(duration_str, content, dur_len);
1568
+ duration_str[dur_len] = '\0';
1569
+ duration = PyOS_string_to_double(duration_str, NULL, NULL);
1570
+ if (duration == -1.0 && PyErr_Occurred()) {
1571
+ PyErr_Clear();
1572
+ duration = 0.0;
1573
+ }
1574
+ title = comma + 1;
1575
+ } else {
1576
+ if (strict) {
1577
+ raise_parse_error(mod_state, lineno, line);
1578
+ return -1;
1579
+ }
1580
+ duration = PyOS_string_to_double(content, NULL, NULL);
1581
+ if (duration == -1.0 && PyErr_Occurred()) {
1582
+ PyErr_Clear();
1583
+ duration = 0.0;
1584
+ }
1585
+ }
1586
+
1587
+ /* Get or create segment dict in state using interned string */
1588
+ PyObject *segment = get_or_create_segment(mod_state, state);
1589
+ if (segment == NULL) {
1590
+ return -1;
1591
+ }
1592
+
1593
+ /* Set duration using interned key */
1594
+ PyObject *py_duration = PyFloat_FromDouble(duration);
1595
+ if (py_duration == NULL) {
1596
+ return -1;
1597
+ }
1598
+ if (dict_set_interned(segment, mod_state->str_duration, py_duration) < 0) {
1599
+ Py_DECREF(py_duration);
1600
+ return -1;
1601
+ }
1602
+ Py_DECREF(py_duration);
1603
+
1604
+ /* Set title using interned key */
1605
+ PyObject *py_title = PyUnicode_FromString(title);
1606
+ if (py_title == NULL) {
1607
+ return -1;
1608
+ }
1609
+ if (dict_set_interned(segment, mod_state->str_title, py_title) < 0) {
1610
+ Py_DECREF(py_title);
1611
+ return -1;
1612
+ }
1613
+ Py_DECREF(py_title);
1614
+
1615
+ /* Set expect_segment flag using interned key */
1616
+ if (dict_set_interned(state, mod_state->str_expect_segment, Py_True) < 0) {
1617
+ return -1;
1618
+ }
1619
+ return 0;
1620
+ }
1621
+
1622
+ /*
1623
+ * Parse a segment URI line.
1624
+ * Returns 0 on success, -1 on failure with exception set.
1625
+ */
1626
+ static int
1627
+ parse_ts_chunk(m3u8_state *mod_state, const char *line,
1628
+ PyObject *data, PyObject *state)
1629
+ {
1630
+ /* Get segment dict from state using interned key, or create new one */
1631
+ PyObject *segment = dict_get_interned(state, mod_state->str_segment);
1632
+ if (segment == NULL) {
1633
+ segment = PyDict_New();
1634
+ if (segment == NULL) {
1635
+ return -1;
1636
+ }
1637
+ } else {
1638
+ Py_INCREF(segment);
1639
+ }
1640
+ /* Remove segment from state (we're taking ownership) */
1641
+ if (PyDict_DelItem(state, mod_state->str_segment) < 0) {
1642
+ if (!PyErr_ExceptionMatches(PyExc_KeyError)) {
1643
+ Py_DECREF(segment);
1644
+ return -1;
1645
+ }
1646
+ PyErr_Clear();
1647
+ }
1648
+
1649
+ /* Add URI using interned key */
1650
+ PyObject *uri = PyUnicode_FromString(line);
1651
+ if (uri == NULL) {
1652
+ Py_DECREF(segment);
1653
+ return -1;
1654
+ }
1655
+ if (dict_set_interned(segment, mod_state->str_uri, uri) < 0) {
1656
+ Py_DECREF(uri);
1657
+ Py_DECREF(segment);
1658
+ return -1;
1659
+ }
1660
+ Py_DECREF(uri);
1661
+
1662
+ /* Transfer state values to segment (borrowed references) */
1663
+ PyObject *pdt = dict_get_interned(state, mod_state->str_program_date_time);
1664
+ if (pdt != NULL) {
1665
+ if (dict_set_interned(segment, mod_state->str_program_date_time, pdt) < 0) {
1666
+ Py_DECREF(segment);
1667
+ return -1;
1668
+ }
1669
+ if (del_item_interned_ignore_keyerror(state, mod_state->str_program_date_time) < 0) {
1670
+ Py_DECREF(segment);
1671
+ return -1;
1672
+ }
1673
+ }
1674
+
1675
+ PyObject *current_pdt = dict_get_interned(state, mod_state->str_current_program_date_time);
1676
+ if (current_pdt != NULL) {
1677
+ if (dict_set_interned(segment, mod_state->str_current_program_date_time, current_pdt) < 0) {
1678
+ Py_DECREF(segment);
1679
+ return -1;
1680
+ }
1681
+ /* Update current_program_date_time by adding duration */
1682
+ PyObject *duration = dict_get_interned(segment, mod_state->str_duration);
1683
+ if (duration != NULL && current_pdt != NULL) {
1684
+ double secs = PyFloat_AsDouble(duration);
1685
+ if (PyErr_Occurred()) {
1686
+ Py_DECREF(segment);
1687
+ return -1;
1688
+ }
1689
+ PyObject *new_pdt = datetime_add_seconds(mod_state, current_pdt, secs);
1690
+ if (new_pdt == NULL) {
1691
+ Py_DECREF(segment);
1692
+ return -1;
1693
+ }
1694
+ if (dict_set_interned(state, mod_state->str_current_program_date_time, new_pdt) < 0) {
1695
+ Py_DECREF(new_pdt);
1696
+ Py_DECREF(segment);
1697
+ return -1;
1698
+ }
1699
+ Py_DECREF(new_pdt);
1700
+ }
1701
+ }
1702
+
1703
+ /* Boolean flags from state - use transfer_state_bool helper */
1704
+ if (transfer_state_bool(state, segment, mod_state->str_cue_in) < 0) {
1705
+ Py_DECREF(segment);
1706
+ return -1;
1707
+ }
1708
+
1709
+ /* cue_out needs special handling: check truthiness and keep state ref */
1710
+ PyObject *cue_out = dict_get_interned(state, mod_state->str_cue_out);
1711
+ int cue_out_truth = cue_out ? PyObject_IsTrue(cue_out) : 0;
1712
+ if (cue_out_truth < 0) {
1713
+ Py_DECREF(segment);
1714
+ return -1;
1715
+ }
1716
+ if (dict_set_interned(segment, mod_state->str_cue_out, cue_out_truth ? Py_True : Py_False) < 0) {
1717
+ Py_DECREF(segment);
1718
+ return -1;
1719
+ }
1720
+
1721
+ if (transfer_state_bool(state, segment, mod_state->str_cue_out_start) < 0 ||
1722
+ transfer_state_bool(state, segment, mod_state->str_cue_out_explicitly_duration) < 0) {
1723
+ Py_DECREF(segment);
1724
+ return -1;
1725
+ }
1726
+
1727
+ /* SCTE35 values - get if cue_out, pop otherwise */
1728
+ PyObject *scte_keys[] = {
1729
+ mod_state->str_current_cue_out_scte35,
1730
+ mod_state->str_current_cue_out_oatcls_scte35,
1731
+ mod_state->str_current_cue_out_duration,
1732
+ mod_state->str_current_cue_out_elapsedtime,
1733
+ mod_state->str_asset_metadata,
1734
+ };
1735
+ PyObject *seg_keys[] = {
1736
+ mod_state->str_scte35,
1737
+ mod_state->str_oatcls_scte35,
1738
+ mod_state->str_scte35_duration,
1739
+ mod_state->str_scte35_elapsedtime,
1740
+ mod_state->str_asset_metadata,
1741
+ };
1742
+
1743
+ for (int i = 0; i < 5; i++) {
1744
+ PyObject *val = dict_get_interned(state, scte_keys[i]);
1745
+ if (val) {
1746
+ if (dict_set_interned(segment, seg_keys[i], val) < 0) {
1747
+ Py_DECREF(segment);
1748
+ return -1;
1749
+ }
1750
+ if (!cue_out_truth) {
1751
+ if (del_item_interned_ignore_keyerror(state, scte_keys[i]) < 0) {
1752
+ Py_DECREF(segment);
1753
+ return -1;
1754
+ }
1755
+ }
1756
+ } else {
1757
+ /* Clear any potential error from GetItem (though unlikely) */
1758
+ PyErr_Clear();
1759
+ if (dict_set_interned(segment, seg_keys[i], Py_None) < 0) {
1760
+ Py_DECREF(segment);
1761
+ return -1;
1762
+ }
1763
+ }
1764
+ }
1765
+
1766
+ if (del_item_interned_ignore_keyerror(state, mod_state->str_cue_out) < 0) {
1767
+ Py_DECREF(segment);
1768
+ return -1;
1769
+ }
1770
+
1771
+ /* Discontinuity */
1772
+ if (transfer_state_bool(state, segment, mod_state->str_discontinuity) < 0) {
1773
+ Py_DECREF(segment);
1774
+ return -1;
1775
+ }
1776
+
1777
+ /* Key - use interned string for current_key lookup */
1778
+ PyObject *current_key = dict_get_interned(state, mod_state->str_current_key);
1779
+ if (current_key) {
1780
+ if (dict_set_interned(segment, mod_state->str_key, current_key) < 0) {
1781
+ Py_DECREF(segment);
1782
+ return -1;
1783
+ }
1784
+ } else {
1785
+ /* For unencrypted segments, ensure None is in keys list */
1786
+ PyObject *keys = dict_get_interned(data, mod_state->str_keys);
1787
+ if (keys) {
1788
+ int has_none = 0;
1789
+ Py_ssize_t n = PyList_Size(keys);
1790
+ for (Py_ssize_t i = 0; i < n; i++) {
1791
+ if (PyList_GetItem(keys, i) == Py_None) {
1792
+ has_none = 1;
1793
+ break;
1794
+ }
1795
+ }
1796
+ if (!has_none) {
1797
+ PyList_Append(keys, Py_None);
1798
+ }
1799
+ }
1800
+ }
1801
+
1802
+ /* Init section */
1803
+ PyObject *current_segment_map = dict_get_interned(state, mod_state->str_current_segment_map);
1804
+ /* Only set init_section if the map dict is non-empty (matches Python's truthiness check) */
1805
+ if (current_segment_map && PyDict_Size(current_segment_map) > 0) {
1806
+ if (dict_set_interned(segment, mod_state->str_init_section, current_segment_map) < 0) {
1807
+ Py_DECREF(segment);
1808
+ return -1;
1809
+ }
1810
+ }
1811
+
1812
+ /* Dateranges and Blackout - transfer value or None */
1813
+ if (transfer_state_value(state, segment, mod_state->str_dateranges) < 0 ||
1814
+ transfer_state_value(state, segment, mod_state->str_blackout) < 0) {
1815
+ Py_DECREF(segment);
1816
+ return -1;
1817
+ }
1818
+
1819
+ /* Gap - special: read str_gap, write to str_gap_tag as True/None */
1820
+ PyObject *gap = dict_get_interned(state, mod_state->str_gap);
1821
+ if (dict_set_interned(segment, mod_state->str_gap_tag, gap ? Py_True : Py_None) < 0) {
1822
+ Py_DECREF(segment);
1823
+ return -1;
1824
+ }
1825
+ if (gap && del_item_interned_ignore_keyerror(state, mod_state->str_gap) < 0) {
1826
+ Py_DECREF(segment);
1827
+ return -1;
1828
+ }
1829
+
1830
+ /* Add to segments list using interned key */
1831
+ PyObject *segments = dict_get_interned(data, mod_state->str_segments);
1832
+ if (segments) {
1833
+ if (PyList_Append(segments, segment) < 0) {
1834
+ Py_DECREF(segment);
1835
+ return -1;
1836
+ }
1837
+ }
1838
+
1839
+ /* Clear expect_segment flag using interned key */
1840
+ if (dict_set_interned(state, mod_state->str_expect_segment, Py_False) < 0) {
1841
+ Py_DECREF(segment);
1842
+ return -1;
1843
+ }
1844
+ Py_DECREF(segment);
1845
+ return 0;
1846
+ }
1847
+
1848
+ /* Parse variant playlist - uses interned strings throughout */
1849
+ static int parse_variant_playlist(m3u8_state *ms, const char *line,
1850
+ PyObject *data, PyObject *state) {
1851
+ PyObject *stream_info = dict_get_interned(state, ms->str_stream_info);
1852
+ if (!stream_info) {
1853
+ stream_info = PyDict_New();
1854
+ if (!stream_info) return -1;
1855
+ } else {
1856
+ Py_INCREF(stream_info);
1857
+ }
1858
+ if (del_item_interned_ignore_keyerror(state, ms->str_stream_info) < 0) {
1859
+ Py_DECREF(stream_info);
1860
+ return -1;
1861
+ }
1862
+
1863
+ PyObject *playlist = PyDict_New();
1864
+ if (!playlist) {
1865
+ Py_DECREF(stream_info);
1866
+ return -1;
1867
+ }
1868
+
1869
+ PyObject *uri = PyUnicode_FromString(line);
1870
+ if (!uri) {
1871
+ Py_DECREF(playlist);
1872
+ Py_DECREF(stream_info);
1873
+ return -1;
1874
+ }
1875
+ if (dict_set_interned(playlist, ms->str_uri, uri) < 0) {
1876
+ Py_DECREF(uri);
1877
+ Py_DECREF(playlist);
1878
+ Py_DECREF(stream_info);
1879
+ return -1;
1880
+ }
1881
+ Py_DECREF(uri);
1882
+
1883
+ if (dict_set_interned(playlist, ms->str_stream_info, stream_info) < 0) {
1884
+ Py_DECREF(playlist);
1885
+ Py_DECREF(stream_info);
1886
+ return -1;
1887
+ }
1888
+ Py_DECREF(stream_info);
1889
+
1890
+ PyObject *playlists = dict_get_interned(data, ms->str_playlists);
1891
+ if (playlists && PyList_Append(playlists, playlist) < 0) {
1892
+ Py_DECREF(playlist);
1893
+ return -1;
1894
+ }
1895
+ Py_DECREF(playlist);
1896
+
1897
+ return dict_set_interned(state, ms->str_expect_playlist, Py_False);
1898
+ }
1899
+
1900
+ /*
1901
+ * Parse EXT-X-PROGRAM-DATE-TIME tag - uses interned strings.
1902
+ * Returns 0 on success, -1 on failure with exception set.
1903
+ */
1904
+ static int
1905
+ parse_program_date_time(m3u8_state *ms, const char *line,
1906
+ PyObject *data, PyObject *state)
1907
+ {
1908
+ const char *value = strchr(line, ':');
1909
+ if (value == NULL) return 0;
1910
+ value++;
1911
+
1912
+ PyObject *dt = PyObject_CallFunction(ms->fromisoformat_meth, "s", value);
1913
+ if (dt == NULL) return -1;
1914
+
1915
+ /* Set in data if not already set */
1916
+ PyObject *existing = dict_get_interned(data, ms->str_program_date_time);
1917
+ if (existing == NULL || existing == Py_None) {
1918
+ if (dict_set_interned(data, ms->str_program_date_time, dt) < 0) {
1919
+ Py_DECREF(dt);
1920
+ return -1;
1921
+ }
1922
+ }
1923
+
1924
+ if (dict_set_interned(state, ms->str_current_program_date_time, dt) < 0 ||
1925
+ dict_set_interned(state, ms->str_program_date_time, dt) < 0) {
1926
+ Py_DECREF(dt);
1927
+ return -1;
1928
+ }
1929
+ Py_DECREF(dt);
1930
+ return 0;
1931
+ }
1932
+
1933
+ /*
1934
+ * Parse EXT-X-PART tag - uses interned strings throughout.
1935
+ * Returns 0 on success, -1 on failure with exception set.
1936
+ */
1937
+ static int
1938
+ parse_part(m3u8_state *ms, const char *line, PyObject *state)
1939
+ {
1940
+ PyObject *part = parse_typed_attribute_list(line, EXT_X_PART,
1941
+ part_parsers, NUM_PART_PARSERS);
1942
+ if (part == NULL) return -1;
1943
+
1944
+ /* Add program_date_time if available */
1945
+ PyObject *current_pdt = dict_get_interned(state, ms->str_current_program_date_time);
1946
+ if (current_pdt != NULL) {
1947
+ if (dict_set_interned(part, ms->str_program_date_time, current_pdt) < 0) {
1948
+ Py_DECREF(part);
1949
+ return -1;
1950
+ }
1951
+ /* Update current_program_date_time */
1952
+ PyObject *duration = dict_get_interned(part, ms->str_duration);
1953
+ if (duration != NULL) {
1954
+ double secs = PyFloat_AsDouble(duration);
1955
+ if (PyErr_Occurred()) {
1956
+ Py_DECREF(part);
1957
+ return -1;
1958
+ }
1959
+ PyObject *new_pdt = datetime_add_seconds(ms, current_pdt, secs);
1960
+ if (new_pdt == NULL) {
1961
+ Py_DECREF(part);
1962
+ return -1;
1963
+ }
1964
+ if (dict_set_interned(state, ms->str_current_program_date_time, new_pdt) < 0) {
1965
+ Py_DECREF(new_pdt);
1966
+ Py_DECREF(part);
1967
+ return -1;
1968
+ }
1969
+ Py_DECREF(new_pdt);
1970
+ }
1971
+ }
1972
+
1973
+ /* Add dateranges - use transfer_state_value pattern */
1974
+ if (transfer_state_value(state, part, ms->str_dateranges) < 0) {
1975
+ Py_DECREF(part);
1976
+ return -1;
1977
+ }
1978
+
1979
+ /* Add gap_tag - read from str_gap, write True/None to str_gap_tag */
1980
+ PyObject *gap = dict_get_interned(state, ms->str_gap);
1981
+ if (dict_set_interned(part, ms->str_gap_tag, gap ? Py_True : Py_None) < 0) {
1982
+ Py_DECREF(part);
1983
+ return -1;
1984
+ }
1985
+ if (gap && del_item_interned_ignore_keyerror(state, ms->str_gap) < 0) {
1986
+ Py_DECREF(part);
1987
+ return -1;
1988
+ }
1989
+
1990
+ /* Get or create segment */
1991
+ PyObject *segment = dict_get_interned(state, ms->str_segment);
1992
+ if (segment == NULL) {
1993
+ segment = PyDict_New();
1994
+ if (segment == NULL) {
1995
+ Py_DECREF(part);
1996
+ return -1;
1997
+ }
1998
+ if (dict_set_interned(state, ms->str_segment, segment) < 0) {
1999
+ Py_DECREF(segment);
2000
+ Py_DECREF(part);
2001
+ return -1;
2002
+ }
2003
+ Py_DECREF(segment);
2004
+ segment = dict_get_interned(state, ms->str_segment);
2005
+ }
2006
+
2007
+ /* Get or create parts list in segment */
2008
+ PyObject *parts = dict_get_interned(segment, ms->str_parts);
2009
+ if (parts == NULL) {
2010
+ parts = PyList_New(0);
2011
+ if (parts == NULL) {
2012
+ Py_DECREF(part);
2013
+ return -1;
2014
+ }
2015
+ if (dict_set_interned(segment, ms->str_parts, parts) < 0) {
2016
+ Py_DECREF(parts);
2017
+ Py_DECREF(part);
2018
+ return -1;
2019
+ }
2020
+ Py_DECREF(parts);
2021
+ parts = dict_get_interned(segment, ms->str_parts);
2022
+ }
2023
+
2024
+ if (PyList_Append(parts, part) < 0) {
2025
+ Py_DECREF(part);
2026
+ return -1;
2027
+ }
2028
+ Py_DECREF(part);
2029
+ return 0;
2030
+ }
2031
+
2032
+ /* Parse cue out - uses interned strings for state dict */
2033
+ static int parse_cueout(m3u8_state *ms, const char *line, PyObject *state) {
2034
+ if (dict_set_interned(state, ms->str_cue_out_start, Py_True) < 0 ||
2035
+ dict_set_interned(state, ms->str_cue_out, Py_True) < 0) {
2036
+ return -1;
2037
+ }
2038
+
2039
+ /* Check for DURATION keyword */
2040
+ char upper_line[1024];
2041
+ size_t i;
2042
+ for (i = 0; i < sizeof(upper_line) - 1 && line[i]; i++) {
2043
+ upper_line[i] = toupper((unsigned char)line[i]);
2044
+ }
2045
+ upper_line[i] = '\0';
2046
+
2047
+ if (strstr(upper_line, "DURATION")) {
2048
+ if (dict_set_interned(state, ms->str_cue_out_explicitly_duration, Py_True) < 0) {
2049
+ return -1;
2050
+ }
2051
+ }
2052
+
2053
+ /* Parse attributes if present */
2054
+ const char *colon = strchr(line, ':');
2055
+ if (!colon || *(colon + 1) == '\0') {
2056
+ return 0;
2057
+ }
2058
+
2059
+ PyObject *cue_info = parse_typed_attribute_list(line, EXT_X_CUE_OUT,
2060
+ cueout_parsers, NUM_CUEOUT_PARSERS);
2061
+ if (!cue_info) return -1;
2062
+
2063
+ /* cue_info uses attr keys like "cue", "duration", "" - not interned */
2064
+ PyObject *cue_out_scte35 = PyDict_GetItemString(cue_info, "cue");
2065
+ PyObject *cue_out_duration = PyDict_GetItemString(cue_info, "duration");
2066
+ if (!cue_out_duration) {
2067
+ cue_out_duration = PyDict_GetItemString(cue_info, "");
2068
+ }
2069
+
2070
+ /* State dict uses interned keys */
2071
+ if (cue_out_scte35) {
2072
+ if (dict_set_interned(state, ms->str_current_cue_out_scte35, cue_out_scte35) < 0) {
2073
+ Py_DECREF(cue_info);
2074
+ return -1;
2075
+ }
2076
+ }
2077
+ if (cue_out_duration) {
2078
+ if (dict_set_interned(state, ms->str_current_cue_out_duration, cue_out_duration) < 0) {
2079
+ Py_DECREF(cue_info);
2080
+ return -1;
2081
+ }
2082
+ }
2083
+
2084
+ Py_DECREF(cue_info);
2085
+ return 0;
2086
+ }
2087
+
2088
+ /* Parse cue out cont - uses interned strings for state dict */
2089
+ static int parse_cueout_cont(m3u8_state *ms, const char *line, PyObject *state) {
2090
+ if (dict_set_interned(state, ms->str_cue_out, Py_True) < 0) return -1;
2091
+
2092
+ const char *colon = strchr(line, ':');
2093
+ if (!colon || *(colon + 1) == '\0') return 0;
2094
+
2095
+ PyObject *cue_info = parse_typed_attribute_list(line, EXT_X_CUE_OUT_CONT,
2096
+ cueout_cont_parsers, NUM_CUEOUT_CONT_PARSERS);
2097
+ if (!cue_info) return -1;
2098
+
2099
+ /* cue_info uses attr keys like "", "duration", etc. - not interned */
2100
+ PyObject *progress = PyDict_GetItemString(cue_info, "");
2101
+ if (progress) {
2102
+ if (!PyUnicode_Check(progress)) {
2103
+ PyErr_SetString(PyExc_TypeError, "expected str for cue-out progress");
2104
+ Py_DECREF(cue_info);
2105
+ return -1;
2106
+ }
2107
+ Py_ssize_t n = PyUnicode_GetLength(progress);
2108
+ Py_ssize_t slash = PyUnicode_FindChar(progress, '/', 0, n, 1);
2109
+ if (slash >= 0) {
2110
+ PyObject *elapsed = PyUnicode_Substring(progress, 0, slash);
2111
+ PyObject *duration = PyUnicode_Substring(progress, slash + 1, n);
2112
+ if (elapsed == NULL || duration == NULL) {
2113
+ Py_XDECREF(elapsed);
2114
+ Py_XDECREF(duration);
2115
+ Py_DECREF(cue_info);
2116
+ return -1;
2117
+ }
2118
+ /* Use interned keys for state dict */
2119
+ int rc1 = dict_set_interned(state, ms->str_current_cue_out_elapsedtime, elapsed);
2120
+ int rc2 = dict_set_interned(state, ms->str_current_cue_out_duration, duration);
2121
+ Py_DECREF(elapsed);
2122
+ Py_DECREF(duration);
2123
+ if (rc1 < 0 || rc2 < 0) {
2124
+ Py_DECREF(cue_info);
2125
+ return -1;
2126
+ }
2127
+ } else {
2128
+ if (dict_set_interned(state, ms->str_current_cue_out_duration, progress) < 0) {
2129
+ Py_DECREF(cue_info);
2130
+ return -1;
2131
+ }
2132
+ }
2133
+ }
2134
+
2135
+ PyObject *duration = PyDict_GetItemString(cue_info, "duration");
2136
+ if (duration && dict_set_interned(state, ms->str_current_cue_out_duration, duration) < 0) {
2137
+ Py_DECREF(cue_info);
2138
+ return -1;
2139
+ }
2140
+
2141
+ PyObject *scte35 = PyDict_GetItemString(cue_info, "scte35");
2142
+ if (scte35 && dict_set_interned(state, ms->str_current_cue_out_scte35, scte35) < 0) {
2143
+ Py_DECREF(cue_info);
2144
+ return -1;
2145
+ }
2146
+
2147
+ PyObject *elapsedtime = PyDict_GetItemString(cue_info, "elapsedtime");
2148
+ if (elapsedtime && dict_set_interned(state, ms->str_current_cue_out_elapsedtime, elapsedtime) < 0) {
2149
+ Py_DECREF(cue_info);
2150
+ return -1;
2151
+ }
2152
+
2153
+ Py_DECREF(cue_info);
2154
+ return 0;
2155
+ }
2156
+
2157
+ /*
2158
+ * ============================================================================
2159
+ * Dispatch Table Handlers
2160
+ *
2161
+ * These wrapper functions provide a unified signature for the dispatch table.
2162
+ * Each handler receives a ParseContext and the full line, extracts what it
2163
+ * needs, and calls the appropriate parsing logic.
2164
+ * ============================================================================
2165
+ */
2166
+
2167
+ /*
2168
+ * Macro-generated integer value handlers.
2169
+ * These handlers parse an integer value after the tag and store it.
2170
+ */
2171
+ #define MAKE_INT_HANDLER(name, tag, field) \
2172
+ static int name(ParseContext *ctx, const char *line) { \
2173
+ const char *value = line + sizeof(tag); \
2174
+ PyObject *py_value = PyLong_FromString(value, NULL, 10); \
2175
+ if (py_value == NULL) { PyErr_Clear(); return 0; } \
2176
+ int rc = dict_set_interned(ctx->data, ctx->mod_state->field, py_value); \
2177
+ Py_DECREF(py_value); \
2178
+ return rc < 0 ? -1 : 0; \
2179
+ }
2180
+
2181
+ MAKE_INT_HANDLER(handle_targetduration, EXT_X_TARGETDURATION, str_targetduration)
2182
+ MAKE_INT_HANDLER(handle_media_sequence, EXT_X_MEDIA_SEQUENCE, str_media_sequence)
2183
+ MAKE_INT_HANDLER(handle_discontinuity_sequence, EXT_X_DISCONTINUITY_SEQUENCE, str_discontinuity_sequence)
2184
+
2185
+ /*
2186
+ * Macro-generated wrapper handlers that delegate to existing parse functions.
2187
+ */
2188
+ #define MAKE_PARSE_WRAPPER(name, fn) \
2189
+ static int name(ParseContext *ctx, const char *line) { \
2190
+ return fn(ctx->mod_state, line, ctx->data, ctx->state); \
2191
+ }
2192
+
2193
+ MAKE_PARSE_WRAPPER(handle_program_date_time, parse_program_date_time)
2194
+ MAKE_PARSE_WRAPPER(handle_key, parse_key)
2195
+
2196
+ #undef MAKE_PARSE_WRAPPER
2197
+
2198
+ /* Handler for #EXTINF */
2199
+ static int
2200
+ handle_extinf(ParseContext *ctx, const char *line)
2201
+ {
2202
+ int rc = parse_extinf(ctx->mod_state, line, ctx->state, ctx->lineno, ctx->strict);
2203
+ if (rc == 0) {
2204
+ ctx->expect_segment = 1;
2205
+ }
2206
+ return rc;
2207
+ }
2208
+
2209
+ /* Handler for #EXT-X-BYTERANGE */
2210
+ static int
2211
+ handle_byterange(ParseContext *ctx, const char *line)
2212
+ {
2213
+ const char *value = line + sizeof(EXT_X_BYTERANGE);
2214
+ PyObject *segment = get_or_create_segment(ctx->mod_state, ctx->state);
2215
+ if (segment == NULL) {
2216
+ return -1;
2217
+ }
2218
+ PyObject *py_value = PyUnicode_FromString(value);
2219
+ if (py_value == NULL) {
2220
+ return -1;
2221
+ }
2222
+ int rc = dict_set_interned(segment, ctx->mod_state->str_byterange, py_value);
2223
+ Py_DECREF(py_value);
2224
+ if (rc < 0) {
2225
+ return -1;
2226
+ }
2227
+ ctx->expect_segment = 1;
2228
+ return dict_set_interned(ctx->state, ctx->mod_state->str_expect_segment, Py_True);
2229
+ }
2230
+
2231
+ /* Handler for #EXT-X-BITRATE */
2232
+ static int
2233
+ handle_bitrate(ParseContext *ctx, const char *line)
2234
+ {
2235
+ const char *value = line + sizeof(EXT_X_BITRATE);
2236
+ PyObject *segment = get_or_create_segment(ctx->mod_state, ctx->state);
2237
+ if (segment == NULL) {
2238
+ return -1;
2239
+ }
2240
+ PyObject *py_value = PyLong_FromString(value, NULL, 10);
2241
+ if (py_value == NULL) {
2242
+ PyErr_Clear();
2243
+ return 0;
2244
+ }
2245
+ int rc = dict_set_interned(segment, ctx->mod_state->str_bitrate, py_value);
2246
+ Py_DECREF(py_value);
2247
+ return rc < 0 ? -1 : 0;
2248
+ }
2249
+
2250
+ /* Handler for #EXT-X-STREAM-INF */
2251
+ static int
2252
+ handle_stream_inf(ParseContext *ctx, const char *line)
2253
+ {
2254
+ m3u8_state *ms = ctx->mod_state;
2255
+ /* Use shadow state only - synced to dict before custom parser/end of parse */
2256
+ ctx->expect_playlist = 1;
2257
+ if (dict_set_interned(ctx->data, ms->str_is_variant, Py_True) < 0) return -1;
2258
+ if (dict_set_interned(ctx->data, ms->str_media_sequence, Py_None) < 0) return -1;
2259
+
2260
+ PyObject *stream_info = parse_typed_attribute_list(line, EXT_X_STREAM_INF,
2261
+ stream_inf_parsers, NUM_STREAM_INF_PARSERS);
2262
+ if (stream_info == NULL) return -1;
2263
+ int rc = dict_set_interned(ctx->state, ms->str_stream_info, stream_info);
2264
+ Py_DECREF(stream_info);
2265
+ return rc < 0 ? -1 : 0;
2266
+ }
2267
+
2268
+ /*
2269
+ * Helper for iframe/image stream handlers - parses attrs, extracts URI,
2270
+ * builds playlist dict, and appends to list. Uses interned str_uri.
2271
+ */
2272
+ static int
2273
+ handle_stream_inf_with_uri(ParseContext *ctx, const char *line, const char *tag,
2274
+ const AttrParser *parsers, size_t num_parsers,
2275
+ const char *info_key, PyObject *list_key)
2276
+ {
2277
+ m3u8_state *ms = ctx->mod_state;
2278
+ PyObject *info = parse_typed_attribute_list(line, tag, parsers, num_parsers);
2279
+ if (info == NULL) return -1;
2280
+
2281
+ /* Use interned string for URI lookup */
2282
+ PyObject *uri = dict_get_interned(info, ms->str_uri);
2283
+ if (uri == NULL) { Py_DECREF(info); return 0; }
2284
+
2285
+ Py_INCREF(uri);
2286
+ if (del_item_interned_ignore_keyerror(info, ms->str_uri) < 0) {
2287
+ Py_DECREF(uri);
2288
+ Py_DECREF(info);
2289
+ return -1;
2290
+ }
2291
+
2292
+ /* info_key passed as char* - Py_BuildValue "s" is fine for rare keys */
2293
+ PyObject *playlist = Py_BuildValue("{s:N,s:N}", "uri", uri, info_key, info);
2294
+ if (playlist == NULL) return -1;
2295
+
2296
+ PyObject *list = dict_get_interned(ctx->data, list_key);
2297
+ int rc = PyList_Append(list, playlist);
2298
+ Py_DECREF(playlist);
2299
+ return rc;
2300
+ }
2301
+
2302
+ static int handle_i_frame_stream_inf(ParseContext *ctx, const char *line) {
2303
+ return handle_stream_inf_with_uri(ctx, line, EXT_X_I_FRAME_STREAM_INF,
2304
+ iframe_stream_inf_parsers, NUM_IFRAME_STREAM_INF_PARSERS,
2305
+ "iframe_stream_info", ctx->mod_state->str_iframe_playlists);
2306
+ }
2307
+
2308
+ static int handle_image_stream_inf(ParseContext *ctx, const char *line) {
2309
+ return handle_stream_inf_with_uri(ctx, line, EXT_X_IMAGE_STREAM_INF,
2310
+ image_stream_inf_parsers, NUM_IMAGE_STREAM_INF_PARSERS,
2311
+ "image_stream_info", ctx->mod_state->str_image_playlists);
2312
+ }
2313
+
2314
+ /*
2315
+ * Macro-generated handlers that parse typed attributes and append to a list.
2316
+ */
2317
+ #define MAKE_TYPED_ATTR_LIST_HANDLER(name, tag, parsers, num_parsers, field) \
2318
+ static int name(ParseContext *ctx, const char *line) { \
2319
+ PyObject *result = parse_typed_attribute_list(line, tag, parsers, num_parsers); \
2320
+ if (result == NULL) return -1; \
2321
+ PyObject *list = dict_get_interned(ctx->data, ctx->mod_state->field); \
2322
+ int rc = PyList_Append(list, result); \
2323
+ Py_DECREF(result); \
2324
+ return rc; \
2325
+ }
2326
+
2327
+ MAKE_TYPED_ATTR_LIST_HANDLER(handle_media, EXT_X_MEDIA, media_parsers, NUM_MEDIA_PARSERS, str_media)
2328
+
2329
+ /* Handler for #EXT-X-PLAYLIST-TYPE */
2330
+ static int
2331
+ handle_playlist_type(ParseContext *ctx, const char *line)
2332
+ {
2333
+ const char *value = line + sizeof(EXT_X_PLAYLIST_TYPE);
2334
+ /* Use create_normalized_key for safe, DRY normalization (tolower + strip) */
2335
+ PyObject *py_value = create_normalized_key(value, strlen(value));
2336
+ if (py_value == NULL) return -1;
2337
+ int rc = dict_set_interned(ctx->data, ctx->mod_state->str_playlist_type, py_value);
2338
+ Py_DECREF(py_value);
2339
+ return rc < 0 ? -1 : 0;
2340
+ }
2341
+
2342
+ MAKE_INT_HANDLER(handle_version, EXT_X_VERSION, str_version)
2343
+
2344
+ #undef MAKE_INT_HANDLER
2345
+
2346
+ /* Handler for #EXT-X-ALLOW-CACHE */
2347
+ static int
2348
+ handle_allow_cache(ParseContext *ctx, const char *line)
2349
+ {
2350
+ const char *value = line + sizeof(EXT_X_ALLOW_CACHE);
2351
+ char normalized[64];
2352
+ size_t i;
2353
+ for (i = 0; i < sizeof(normalized) - 1 && value[i]; i++) {
2354
+ normalized[i] = ascii_tolower((unsigned char)value[i]);
2355
+ }
2356
+ normalized[i] = '\0';
2357
+ PyObject *py_value = PyUnicode_FromString(strip(normalized));
2358
+ if (py_value == NULL) {
2359
+ return -1;
2360
+ }
2361
+ int rc = dict_set_interned(ctx->data, ctx->mod_state->str_allow_cache, py_value);
2362
+ Py_DECREF(py_value);
2363
+ return rc < 0 ? -1 : 0;
2364
+ }
2365
+
2366
+ /*
2367
+ * Macro-generated flag handlers.
2368
+ * These handlers just set a boolean flag in either data or state dict.
2369
+ */
2370
+ #define MAKE_DATA_FLAG_HANDLER(name, field) \
2371
+ static int name(ParseContext *ctx, const char *line) { \
2372
+ (void)line; \
2373
+ return dict_set_interned(ctx->data, ctx->mod_state->field, Py_True); \
2374
+ }
2375
+
2376
+ #define MAKE_STATE_FLAG_HANDLER(name, field) \
2377
+ static int name(ParseContext *ctx, const char *line) { \
2378
+ (void)line; \
2379
+ return dict_set_interned(ctx->state, ctx->mod_state->field, Py_True); \
2380
+ }
2381
+
2382
+ MAKE_DATA_FLAG_HANDLER(handle_i_frames_only, str_is_i_frames_only)
2383
+ MAKE_DATA_FLAG_HANDLER(handle_independent_segments, str_is_independent_segments)
2384
+ MAKE_DATA_FLAG_HANDLER(handle_endlist, str_is_endlist)
2385
+ MAKE_DATA_FLAG_HANDLER(handle_images_only, str_is_images_only)
2386
+ MAKE_STATE_FLAG_HANDLER(handle_discontinuity, str_discontinuity)
2387
+ MAKE_STATE_FLAG_HANDLER(handle_cue_in, str_cue_in)
2388
+ MAKE_STATE_FLAG_HANDLER(handle_cue_span, str_cue_out)
2389
+ MAKE_STATE_FLAG_HANDLER(handle_gap, str_gap)
2390
+
2391
+ #undef MAKE_DATA_FLAG_HANDLER
2392
+ #undef MAKE_STATE_FLAG_HANDLER
2393
+
2394
+ /* Wrapper handlers for cue parsing */
2395
+ static int handle_cue_out(ParseContext *ctx, const char *line) {
2396
+ return parse_cueout(ctx->mod_state, line, ctx->state);
2397
+ }
2398
+ static int handle_cue_out_cont(ParseContext *ctx, const char *line) {
2399
+ return parse_cueout_cont(ctx->mod_state, line, ctx->state);
2400
+ }
2401
+
2402
+ /* Handler for #EXT-OATCLS-SCTE35 - uses interned strings */
2403
+ static int
2404
+ handle_oatcls_scte35(ParseContext *ctx, const char *line)
2405
+ {
2406
+ m3u8_state *ms = ctx->mod_state;
2407
+ const char *value = strchr(line, ':');
2408
+ if (value == NULL) return 0;
2409
+ value++;
2410
+
2411
+ PyObject *py_value = PyUnicode_FromString(value);
2412
+ if (py_value == NULL) return -1;
2413
+
2414
+ if (dict_set_interned(ctx->state, ms->str_current_cue_out_oatcls_scte35, py_value) < 0) {
2415
+ Py_DECREF(py_value);
2416
+ return -1;
2417
+ }
2418
+ PyObject *current = dict_get_interned(ctx->state, ms->str_current_cue_out_scte35);
2419
+ if (current == NULL) {
2420
+ if (dict_set_interned(ctx->state, ms->str_current_cue_out_scte35, py_value) < 0) {
2421
+ Py_DECREF(py_value);
2422
+ return -1;
2423
+ }
2424
+ }
2425
+ Py_DECREF(py_value);
2426
+ return 0;
2427
+ }
2428
+
2429
+ /* Handler for #EXT-X-ASSET - uses interned strings */
2430
+ static int
2431
+ handle_asset(ParseContext *ctx, const char *line)
2432
+ {
2433
+ PyObject *asset = parse_attribute_list(line, EXT_X_ASSET);
2434
+ if (asset == NULL) return -1;
2435
+ int rc = dict_set_interned(ctx->state, ctx->mod_state->str_asset_metadata, asset);
2436
+ Py_DECREF(asset);
2437
+ return rc < 0 ? -1 : 0;
2438
+ }
2439
+
2440
+ /* Handler for #EXT-X-MAP */
2441
+ static int
2442
+ handle_map(ParseContext *ctx, const char *line)
2443
+ {
2444
+ m3u8_state *ms = ctx->mod_state;
2445
+ PyObject *map_info = parse_typed_attribute_list(line, EXT_X_MAP,
2446
+ x_map_parsers, NUM_X_MAP_PARSERS);
2447
+ if (map_info == NULL) {
2448
+ return -1;
2449
+ }
2450
+ if (dict_set_interned(ctx->state, ms->str_current_segment_map, map_info) < 0) {
2451
+ Py_DECREF(map_info);
2452
+ return -1;
2453
+ }
2454
+ PyObject *segment_map = dict_get_interned(ctx->data, ms->str_segment_map);
2455
+ int rc = PyList_Append(segment_map, map_info);
2456
+ Py_DECREF(map_info);
2457
+ return rc;
2458
+ }
2459
+
2460
+ /*
2461
+ * Macro-generated typed attribute handlers.
2462
+ * These parse typed attributes and store the result in ctx->data.
2463
+ */
2464
+ #define MAKE_TYPED_ATTR_HANDLER(name, tag, parsers, num_parsers, field) \
2465
+ static int name(ParseContext *ctx, const char *line) { \
2466
+ PyObject *result = parse_typed_attribute_list(line, tag, parsers, num_parsers); \
2467
+ if (result == NULL) return -1; \
2468
+ int rc = dict_set_interned(ctx->data, ctx->mod_state->field, result); \
2469
+ Py_DECREF(result); \
2470
+ return rc < 0 ? -1 : 0; \
2471
+ }
2472
+
2473
+ MAKE_TYPED_ATTR_HANDLER(handle_start, EXT_X_START, start_parsers, NUM_START_PARSERS, str_start)
2474
+ MAKE_TYPED_ATTR_HANDLER(handle_server_control, EXT_X_SERVER_CONTROL, server_control_parsers, NUM_SERVER_CONTROL_PARSERS, str_server_control)
2475
+ MAKE_TYPED_ATTR_HANDLER(handle_part_inf, EXT_X_PART_INF, part_inf_parsers, NUM_PART_INF_PARSERS, str_part_inf)
2476
+
2477
+ /* Handler for #EXT-X-PART */
2478
+ static int handle_part(ParseContext *ctx, const char *line) {
2479
+ return parse_part(ctx->mod_state, line, ctx->state);
2480
+ }
2481
+
2482
+ MAKE_TYPED_ATTR_LIST_HANDLER(handle_rendition_report, EXT_X_RENDITION_REPORT, rendition_report_parsers, NUM_RENDITION_REPORT_PARSERS, str_rendition_reports)
2483
+
2484
+ MAKE_TYPED_ATTR_HANDLER(handle_skip, EXT_X_SKIP, skip_parsers, NUM_SKIP_PARSERS, str_skip)
2485
+
2486
+ MAKE_TYPED_ATTR_LIST_HANDLER(handle_session_data, EXT_X_SESSION_DATA, session_data_parsers, NUM_SESSION_DATA_PARSERS, str_session_data)
2487
+ MAKE_TYPED_ATTR_LIST_HANDLER(handle_tiles, EXT_X_TILES, tiles_parsers, NUM_TILES_PARSERS, str_tiles)
2488
+
2489
+ #undef MAKE_TYPED_ATTR_LIST_HANDLER
2490
+
2491
+ /* Handler for #EXT-X-SESSION-KEY */
2492
+ static int handle_session_key(ParseContext *ctx, const char *line) {
2493
+ PyObject *key = parse_attrs_unquoted(line, EXT_X_SESSION_KEY);
2494
+ if (!key) return -1;
2495
+ PyObject *session_keys = dict_get_interned(ctx->data, ctx->mod_state->str_session_keys);
2496
+ int rc = PyList_Append(session_keys, key);
2497
+ Py_DECREF(key);
2498
+ return rc;
2499
+ }
2500
+
2501
+ MAKE_TYPED_ATTR_HANDLER(handle_preload_hint, EXT_X_PRELOAD_HINT, preload_hint_parsers, NUM_PRELOAD_HINT_PARSERS, str_preload_hint)
2502
+
2503
+ /* Handler for #EXT-X-DATERANGE */
2504
+ static int
2505
+ handle_daterange(ParseContext *ctx, const char *line)
2506
+ {
2507
+ PyObject *daterange = parse_typed_attribute_list(line, EXT_X_DATERANGE,
2508
+ daterange_parsers, NUM_DATERANGE_PARSERS);
2509
+ if (daterange == NULL) {
2510
+ return -1;
2511
+ }
2512
+
2513
+ PyObject *dateranges = dict_get_interned(ctx->state, ctx->mod_state->str_dateranges);
2514
+ if (dateranges == NULL) {
2515
+ dateranges = PyList_New(0);
2516
+ if (dateranges == NULL) {
2517
+ Py_DECREF(daterange);
2518
+ return -1;
2519
+ }
2520
+ if (dict_set_interned(ctx->state, ctx->mod_state->str_dateranges, dateranges) < 0) {
2521
+ Py_DECREF(dateranges);
2522
+ Py_DECREF(daterange);
2523
+ return -1;
2524
+ }
2525
+ Py_DECREF(dateranges);
2526
+ dateranges = dict_get_interned(ctx->state, ctx->mod_state->str_dateranges);
2527
+ }
2528
+
2529
+ int rc = PyList_Append(dateranges, daterange);
2530
+ Py_DECREF(daterange);
2531
+ return rc;
2532
+ }
2533
+
2534
+ MAKE_TYPED_ATTR_HANDLER(handle_content_steering, EXT_X_CONTENT_STEERING, content_steering_parsers, NUM_CONTENT_STEERING_PARSERS, str_content_steering)
2535
+
2536
+ #undef MAKE_TYPED_ATTR_HANDLER
2537
+
2538
+ /* Handler for #EXT-X-BLACKOUT */
2539
+ static int
2540
+ handle_blackout(ParseContext *ctx, const char *line)
2541
+ {
2542
+ const char *colon = strchr(line, ':');
2543
+ if (colon != NULL && *(colon + 1) != '\0') {
2544
+ PyObject *blackout_data = PyUnicode_FromString(colon + 1);
2545
+ if (blackout_data == NULL) {
2546
+ return -1;
2547
+ }
2548
+ int rc = dict_set_interned(ctx->state, ctx->mod_state->str_blackout, blackout_data);
2549
+ Py_DECREF(blackout_data);
2550
+ return rc < 0 ? -1 : 0;
2551
+ }
2552
+ return dict_set_interned(ctx->state, ctx->mod_state->str_blackout, Py_True);
2553
+ }
2554
+
2555
+ /*
2556
+ * Tag dispatch table.
2557
+ *
2558
+ * This replaces the massive if/else strcmp chain with a data-driven approach.
2559
+ * Linear scan for ~40 tags is negligible compared to Python object creation.
2560
+ * The table is ordered roughly by frequency for marginally better cache behavior.
2561
+ *
2562
+ * Note: sizeof(TAG)-1 gives strlen at compile time (excluding null terminator).
2563
+ */
2564
+ static const TagDispatch TAG_DISPATCH[] = {
2565
+ /* High-frequency tags first */
2566
+ {EXTINF, sizeof(EXTINF)-1, handle_extinf},
2567
+ {EXT_X_KEY, sizeof(EXT_X_KEY)-1, handle_key},
2568
+ {EXT_X_BYTERANGE, sizeof(EXT_X_BYTERANGE)-1, handle_byterange},
2569
+ {EXT_X_PROGRAM_DATE_TIME, sizeof(EXT_X_PROGRAM_DATE_TIME)-1, handle_program_date_time},
2570
+ {EXT_X_DISCONTINUITY, sizeof(EXT_X_DISCONTINUITY)-1, handle_discontinuity},
2571
+ {EXT_X_MAP, sizeof(EXT_X_MAP)-1, handle_map},
2572
+ {EXT_X_PART, sizeof(EXT_X_PART)-1, handle_part},
2573
+ {EXT_X_BITRATE, sizeof(EXT_X_BITRATE)-1, handle_bitrate},
2574
+ {EXT_X_GAP, sizeof(EXT_X_GAP)-1, handle_gap},
2575
+ {EXT_X_DATERANGE, sizeof(EXT_X_DATERANGE)-1, handle_daterange},
2576
+ /* Variant playlist tags */
2577
+ {EXT_X_STREAM_INF, sizeof(EXT_X_STREAM_INF)-1, handle_stream_inf},
2578
+ {EXT_X_MEDIA, sizeof(EXT_X_MEDIA)-1, handle_media},
2579
+ {EXT_X_I_FRAME_STREAM_INF, sizeof(EXT_X_I_FRAME_STREAM_INF)-1, handle_i_frame_stream_inf},
2580
+ {EXT_X_IMAGE_STREAM_INF, sizeof(EXT_X_IMAGE_STREAM_INF)-1, handle_image_stream_inf},
2581
+ {EXT_X_SESSION_DATA, sizeof(EXT_X_SESSION_DATA)-1, handle_session_data},
2582
+ {EXT_X_SESSION_KEY, sizeof(EXT_X_SESSION_KEY)-1, handle_session_key},
2583
+ {EXT_X_CONTENT_STEERING, sizeof(EXT_X_CONTENT_STEERING)-1, handle_content_steering},
2584
+ /* Playlist metadata tags */
2585
+ {EXT_X_TARGETDURATION, sizeof(EXT_X_TARGETDURATION)-1, handle_targetduration},
2586
+ {EXT_X_MEDIA_SEQUENCE, sizeof(EXT_X_MEDIA_SEQUENCE)-1, handle_media_sequence},
2587
+ {EXT_X_DISCONTINUITY_SEQUENCE,sizeof(EXT_X_DISCONTINUITY_SEQUENCE)-1,handle_discontinuity_sequence},
2588
+ {EXT_X_PLAYLIST_TYPE, sizeof(EXT_X_PLAYLIST_TYPE)-1, handle_playlist_type},
2589
+ {EXT_X_VERSION, sizeof(EXT_X_VERSION)-1, handle_version},
2590
+ {EXT_X_ALLOW_CACHE, sizeof(EXT_X_ALLOW_CACHE)-1, handle_allow_cache},
2591
+ {EXT_X_ENDLIST, sizeof(EXT_X_ENDLIST)-1, handle_endlist},
2592
+ {EXT_I_FRAMES_ONLY, sizeof(EXT_I_FRAMES_ONLY)-1, handle_i_frames_only},
2593
+ {EXT_IS_INDEPENDENT_SEGMENTS, sizeof(EXT_IS_INDEPENDENT_SEGMENTS)-1, handle_independent_segments},
2594
+ {EXT_X_IMAGES_ONLY, sizeof(EXT_X_IMAGES_ONLY)-1, handle_images_only},
2595
+ /* Low-latency HLS tags */
2596
+ {EXT_X_SERVER_CONTROL, sizeof(EXT_X_SERVER_CONTROL)-1, handle_server_control},
2597
+ {EXT_X_PART_INF, sizeof(EXT_X_PART_INF)-1, handle_part_inf},
2598
+ {EXT_X_RENDITION_REPORT, sizeof(EXT_X_RENDITION_REPORT)-1, handle_rendition_report},
2599
+ {EXT_X_SKIP, sizeof(EXT_X_SKIP)-1, handle_skip},
2600
+ {EXT_X_PRELOAD_HINT, sizeof(EXT_X_PRELOAD_HINT)-1, handle_preload_hint},
2601
+ /* SCTE-35 / Ad insertion tags */
2602
+ {EXT_X_CUE_OUT_CONT, sizeof(EXT_X_CUE_OUT_CONT)-1, handle_cue_out_cont},
2603
+ {EXT_X_CUE_OUT, sizeof(EXT_X_CUE_OUT)-1, handle_cue_out},
2604
+ {EXT_X_CUE_IN, sizeof(EXT_X_CUE_IN)-1, handle_cue_in},
2605
+ {EXT_X_CUE_SPAN, sizeof(EXT_X_CUE_SPAN)-1, handle_cue_span},
2606
+ {EXT_OATCLS_SCTE35, sizeof(EXT_OATCLS_SCTE35)-1, handle_oatcls_scte35},
2607
+ {EXT_X_ASSET, sizeof(EXT_X_ASSET)-1, handle_asset},
2608
+ /* Miscellaneous tags */
2609
+ {EXT_X_START, sizeof(EXT_X_START)-1, handle_start},
2610
+ {EXT_X_TILES, sizeof(EXT_X_TILES)-1, handle_tiles},
2611
+ {EXT_X_BLACKOUT, sizeof(EXT_X_BLACKOUT)-1, handle_blackout},
2612
+ /* Sentinel */
2613
+ {NULL, 0, NULL}
2614
+ };
2615
+
2616
+ /*
2617
+ * Dispatch a tag to its handler using the dispatch table.
2618
+ *
2619
+ * Returns:
2620
+ * 1 if handler was found and executed successfully
2621
+ * 0 if no handler found (unknown tag)
2622
+ * -1 if handler found but returned error (exception set)
2623
+ */
2624
+ static int
2625
+ dispatch_tag(ParseContext *ctx, const char *line, size_t line_len)
2626
+ {
2627
+ /* Fast rejection: all M3U8 tags start with '#' */
2628
+ if (line_len < 4 || line[0] != '#') {
2629
+ return 0;
2630
+ }
2631
+
2632
+ for (const TagDispatch *d = TAG_DISPATCH; d->tag != NULL; d++) {
2633
+ /* Skip if line is shorter than tag */
2634
+ if (line_len < d->tag_len) continue;
2635
+
2636
+ if (strncmp(line, d->tag, d->tag_len) == 0) {
2637
+ /* Verify tag boundary: must end with ':' or be complete line */
2638
+ char next = line[d->tag_len];
2639
+ if (next == ':' || next == '\0') {
2640
+ if (d->handler(ctx, line) < 0) {
2641
+ return -1;
2642
+ }
2643
+ return 1; /* Handled */
2644
+ }
2645
+ }
2646
+ }
2647
+ return 0; /* Not found */
2648
+ }
2649
+
2650
+ /*
2651
+ * Main parse function.
2652
+ *
2653
+ * Parse M3U8 playlist content and return a dictionary with all data found.
2654
+ *
2655
+ * Args:
2656
+ * content: The M3U8 playlist content as a string.
2657
+ * strict: If True, raise exceptions for syntax errors (default: False).
2658
+ * custom_tags_parser: Optional callable for parsing custom tags.
2659
+ *
2660
+ * Returns:
2661
+ * A dictionary containing the parsed playlist data.
2662
+ */
2663
+ static PyObject *
2664
+ m3u8_parse(PyObject *module, PyObject *args, PyObject *kwargs)
2665
+ {
2666
+ const char *content;
2667
+ Py_ssize_t content_len; /* Get size directly - enables zero-copy parsing */
2668
+ int strict = 0;
2669
+ PyObject *custom_tags_parser = Py_None;
2670
+
2671
+ static char *kwlist[] = {"content", "strict", "custom_tags_parser", NULL};
2672
+
2673
+ /* Use s# to get pointer AND size directly from Python string object */
2674
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|pO", kwlist,
2675
+ &content, &content_len, &strict, &custom_tags_parser)) {
2676
+ return NULL;
2677
+ }
2678
+
2679
+ /*
2680
+ * Match parser.py's behavior: lines = content.strip().splitlines()
2681
+ *
2682
+ * The Python parser strips leading/trailing whitespace *before* splitting,
2683
+ * which affects strict-mode error line numbers when the input has leading
2684
+ * newlines (common with triple-quoted test fixtures).
2685
+ */
2686
+ const char *trimmed = content;
2687
+ const char *trimmed_end = content + content_len;
2688
+ while (trimmed < trimmed_end && ascii_isspace((unsigned char)*trimmed)) {
2689
+ trimmed++;
2690
+ }
2691
+ while (trimmed_end > trimmed && ascii_isspace((unsigned char)*(trimmed_end - 1))) {
2692
+ trimmed_end--;
2693
+ }
2694
+ Py_ssize_t trimmed_len = (Py_ssize_t)(trimmed_end - trimmed);
2695
+
2696
+ /* Get module state for cached objects */
2697
+ m3u8_state *mod_state = get_m3u8_state(module);
2698
+
2699
+ /* Check strict mode validation */
2700
+ if (strict) {
2701
+ /* Import and call version_matching.validate */
2702
+ PyObject *version_matching = PyImport_ImportModule("openm3u8.version_matching");
2703
+ if (version_matching == NULL) {
2704
+ return NULL;
2705
+ }
2706
+ PyObject *validate = PyObject_GetAttrString(version_matching, "validate");
2707
+ if (validate == NULL) {
2708
+ Py_DECREF(version_matching);
2709
+ return NULL;
2710
+ }
2711
+ /* Build list like parser.py: content.strip().splitlines() */
2712
+ PyObject *lines_list = build_stripped_splitlines(trimmed);
2713
+ if (lines_list == NULL) {
2714
+ Py_DECREF(validate);
2715
+ Py_DECREF(version_matching);
2716
+ return NULL;
2717
+ }
2718
+
2719
+ PyObject *errors = PyObject_CallFunctionObjArgs(validate, lines_list, NULL);
2720
+ Py_DECREF(lines_list);
2721
+ Py_DECREF(validate);
2722
+ Py_DECREF(version_matching);
2723
+
2724
+ if (errors == NULL) {
2725
+ return NULL;
2726
+ }
2727
+ if (PyList_Size(errors) > 0) {
2728
+ PyErr_SetObject(PyExc_Exception, errors);
2729
+ Py_DECREF(errors);
2730
+ return NULL;
2731
+ }
2732
+ Py_DECREF(errors);
2733
+ }
2734
+
2735
+ /* Initialize result data dict using interned strings */
2736
+ PyObject *data = init_parse_data(mod_state);
2737
+ if (data == NULL) {
2738
+ return NULL;
2739
+ }
2740
+
2741
+ /* Initialize parser state dict */
2742
+ PyObject *state = init_parse_state(mod_state);
2743
+ if (state == NULL) {
2744
+ Py_DECREF(data);
2745
+ return NULL;
2746
+ }
2747
+
2748
+ /*
2749
+ * Set up parse context with shadow state.
2750
+ * Shadow state avoids dict lookups for hot flags in the main loop.
2751
+ */
2752
+ ParseContext ctx = {
2753
+ .mod_state = mod_state,
2754
+ .data = data,
2755
+ .state = state,
2756
+ .strict = strict,
2757
+ .lineno = 0,
2758
+ .expect_segment = 0, /* Matches init_parse_state */
2759
+ .expect_playlist = 0, /* Matches init_parse_state */
2760
+ };
2761
+
2762
+ /*
2763
+ * Zero-copy line parsing: Walk the buffer with pointers.
2764
+ * We use a single reusable line buffer for the null-terminated stripped line.
2765
+ * This avoids copying the entire content upfront (the strtok_r approach).
2766
+ */
2767
+ const char *p = trimmed;
2768
+ const char *end = trimmed + trimmed_len;
2769
+
2770
+ /* Reusable line buffer - starts small, grows as needed */
2771
+ size_t line_buf_size = 256;
2772
+ char *line_buf = PyMem_Malloc(line_buf_size);
2773
+ if (line_buf == NULL) {
2774
+ Py_DECREF(data);
2775
+ Py_DECREF(state);
2776
+ return PyErr_NoMemory();
2777
+ }
2778
+
2779
+ while (p < end) {
2780
+ ctx.lineno++;
2781
+
2782
+ /* Find end of line using memchr (often hardware-optimized) */
2783
+ const char *line_start = p;
2784
+ const char *eol = p;
2785
+ while (eol < end && *eol != '\n' && *eol != '\r') {
2786
+ eol++;
2787
+ }
2788
+ Py_ssize_t line_len = eol - line_start;
2789
+
2790
+ /* Strip leading whitespace */
2791
+ while (line_len > 0 && ascii_isspace((unsigned char)*line_start)) {
2792
+ line_start++;
2793
+ line_len--;
2794
+ }
2795
+ /* Strip trailing whitespace */
2796
+ while (line_len > 0 && ascii_isspace((unsigned char)line_start[line_len - 1])) {
2797
+ line_len--;
2798
+ }
2799
+
2800
+ /* Advance p past the newline(s) for next iteration */
2801
+ if (eol < end) {
2802
+ if (*eol == '\r' && (eol + 1) < end && *(eol + 1) == '\n') {
2803
+ p = eol + 2; /* Skip \r\n */
2804
+ } else {
2805
+ p = eol + 1; /* Skip \n or \r */
2806
+ }
2807
+ } else {
2808
+ p = end;
2809
+ }
2810
+
2811
+ /* Skip empty lines */
2812
+ if (line_len == 0) {
2813
+ continue;
2814
+ }
2815
+
2816
+ /* Grow line buffer if needed */
2817
+ if ((size_t)line_len + 1 > line_buf_size) {
2818
+ line_buf_size = (size_t)line_len + 1;
2819
+ char *new_buf = PyMem_Realloc(line_buf, line_buf_size);
2820
+ if (new_buf == NULL) {
2821
+ PyMem_Free(line_buf);
2822
+ Py_DECREF(data);
2823
+ Py_DECREF(state);
2824
+ return PyErr_NoMemory();
2825
+ }
2826
+ line_buf = new_buf;
2827
+ }
2828
+
2829
+ /* Copy stripped line to null-terminated buffer */
2830
+ memcpy(line_buf, line_start, line_len);
2831
+ line_buf[line_len] = '\0';
2832
+ char *stripped = line_buf;
2833
+
2834
+ /* Call custom tags parser if provided */
2835
+ if (stripped[0] == '#' && custom_tags_parser != Py_None && PyCallable_Check(custom_tags_parser)) {
2836
+ /* Sync shadow state to dict before callback (so it sees current state) */
2837
+ if (sync_shadow_to_dict(&ctx) < 0) {
2838
+ PyMem_Free(line_buf);
2839
+ Py_DECREF(data);
2840
+ Py_DECREF(state);
2841
+ return NULL;
2842
+ }
2843
+ PyObject *py_line = PyUnicode_FromString(stripped);
2844
+ PyObject *py_lineno = PyLong_FromLong(ctx.lineno);
2845
+ if (py_line == NULL || py_lineno == NULL) {
2846
+ Py_XDECREF(py_line);
2847
+ Py_XDECREF(py_lineno);
2848
+ PyMem_Free(line_buf);
2849
+ Py_DECREF(data);
2850
+ Py_DECREF(state);
2851
+ return NULL;
2852
+ }
2853
+ PyObject *call_args = PyTuple_Pack(4, py_line, py_lineno, data, state);
2854
+ Py_DECREF(py_line);
2855
+ Py_DECREF(py_lineno);
2856
+ if (call_args == NULL) {
2857
+ PyMem_Free(line_buf);
2858
+ Py_DECREF(data);
2859
+ Py_DECREF(state);
2860
+ return NULL;
2861
+ }
2862
+ PyObject *result = PyObject_Call(custom_tags_parser, call_args, NULL);
2863
+ Py_DECREF(call_args);
2864
+ if (!result) {
2865
+ PyMem_Free(line_buf);
2866
+ Py_DECREF(data);
2867
+ Py_DECREF(state);
2868
+ return NULL;
2869
+ }
2870
+ /* Sync shadow state from dict (callback may have modified it) */
2871
+ sync_shadow_from_dict(&ctx);
2872
+ int truth = PyObject_IsTrue(result);
2873
+ Py_DECREF(result);
2874
+ if (truth < 0) {
2875
+ PyMem_Free(line_buf);
2876
+ Py_DECREF(data);
2877
+ Py_DECREF(state);
2878
+ return NULL;
2879
+ }
2880
+ if (truth) {
2881
+ /* p has already been advanced to the next line at the top of the loop */
2882
+ continue;
2883
+ }
2884
+ }
2885
+
2886
+ if (stripped[0] == '#') {
2887
+ /*
2888
+ * Tag dispatch using data-driven table lookup.
2889
+ * This replaces ~400 lines of if/else strcmp chain with a clean loop.
2890
+ * See TAG_DISPATCH table for the tag-to-handler mappings.
2891
+ */
2892
+
2893
+ /* Handle #EXTM3U - just ignore it */
2894
+ if (strncmp(stripped, EXT_M3U, sizeof(EXT_M3U)-1) == 0) {
2895
+ continue;
2896
+ }
2897
+
2898
+ /* Dispatch to handler via table lookup */
2899
+ int dispatch_result = dispatch_tag(&ctx, stripped, line_len);
2900
+ if (dispatch_result < 0) {
2901
+ /* Handler returned error */
2902
+ PyMem_Free(line_buf);
2903
+ Py_DECREF(data);
2904
+ Py_DECREF(state);
2905
+ return NULL;
2906
+ }
2907
+ if (dispatch_result == 0) {
2908
+ /* Unknown tag - error in strict mode */
2909
+ if (ctx.strict) {
2910
+ raise_parse_error(mod_state, ctx.lineno, stripped);
2911
+ PyMem_Free(line_buf);
2912
+ Py_DECREF(data);
2913
+ Py_DECREF(state);
2914
+ return NULL;
2915
+ }
2916
+ }
2917
+ } else {
2918
+ /* Non-comment line - segment or playlist URI */
2919
+ /* Use shadow state for hot path checks (no dict lookups) */
2920
+ if (ctx.expect_segment) {
2921
+ if (parse_ts_chunk(mod_state, stripped, data, state) < 0) {
2922
+ PyMem_Free(line_buf);
2923
+ Py_DECREF(data);
2924
+ Py_DECREF(state);
2925
+ return NULL;
2926
+ }
2927
+ ctx.expect_segment = 0; /* parse_ts_chunk clears this */
2928
+ } else if (ctx.expect_playlist) {
2929
+ if (parse_variant_playlist(mod_state, stripped, data, state) < 0) {
2930
+ PyMem_Free(line_buf);
2931
+ Py_DECREF(data);
2932
+ Py_DECREF(state);
2933
+ return NULL;
2934
+ }
2935
+ ctx.expect_playlist = 0; /* parse_variant_playlist clears this */
2936
+ } else if (strict) {
2937
+ raise_parse_error(mod_state, ctx.lineno, stripped);
2938
+ PyMem_Free(line_buf);
2939
+ Py_DECREF(data);
2940
+ Py_DECREF(state);
2941
+ return NULL;
2942
+ }
2943
+ }
2944
+ /* Loop continues with pointer already advanced */
2945
+ }
2946
+
2947
+ PyMem_Free(line_buf);
2948
+
2949
+ /* Handle remaining partial segment - use interned strings */
2950
+ PyObject *segment = dict_get_interned(state, mod_state->str_segment);
2951
+ if (segment) {
2952
+ PyObject *segments = dict_get_interned(data, mod_state->str_segments);
2953
+ if (segments && PyList_Append(segments, segment) < 0) {
2954
+ Py_DECREF(state);
2955
+ Py_DECREF(data);
2956
+ return NULL;
2957
+ }
2958
+ }
2959
+
2960
+ Py_DECREF(state);
2961
+ return data;
2962
+ }
2963
+
2964
+ /* Module methods */
2965
+ static PyMethodDef m3u8_parser_methods[] = {
2966
+ {"parse", (PyCFunction)m3u8_parse, METH_VARARGS | METH_KEYWORDS,
2967
+ PyDoc_STR(
2968
+ "parse(content, strict=False, custom_tags_parser=None)\n"
2969
+ "--\n\n"
2970
+ "Parse M3U8 playlist content and return a dictionary with all data found.\n\n"
2971
+ "This is an optimized C implementation that produces output identical to\n"
2972
+ "the pure Python parser in openm3u8.parser.parse().\n\n"
2973
+ "Parameters\n"
2974
+ "----------\n"
2975
+ "content : str\n"
2976
+ " The M3U8 playlist content as a string.\n"
2977
+ "strict : bool, optional\n"
2978
+ " If True, raise exceptions for syntax errors. Default is False.\n"
2979
+ "custom_tags_parser : callable, optional\n"
2980
+ " A function that receives (line, lineno, data, state) for custom tag\n"
2981
+ " handling. Return True to skip default parsing for that line.\n\n"
2982
+ "Returns\n"
2983
+ "-------\n"
2984
+ "dict\n"
2985
+ " A dictionary containing the parsed playlist data with keys including:\n"
2986
+ " 'segments', 'playlists', 'media', 'keys', 'is_variant', etc.\n\n"
2987
+ "Raises\n"
2988
+ "------\n"
2989
+ "ParseError\n"
2990
+ " If strict=True and a syntax error is encountered.\n"
2991
+ "Exception\n"
2992
+ " If strict=True and version validation fails.\n\n"
2993
+ "Examples\n"
2994
+ "--------\n"
2995
+ ">>> from openm3u8._m3u8_parser import parse\n"
2996
+ ">>> result = parse('#EXTM3U\\n#EXTINF:10,\\nfoo.ts')\n"
2997
+ ">>> len(result['segments'])\n"
2998
+ "1\n"
2999
+ )},
3000
+ {NULL, NULL, 0, NULL}
3001
+ };
3002
+
3003
+ /*
3004
+ * Module traverse function for GC - uses X-macro expansion.
3005
+ */
3006
+ static int
3007
+ m3u8_parser_traverse(PyObject *module, visitproc visit, void *arg)
3008
+ {
3009
+ m3u8_state *state = get_m3u8_state(module);
3010
+ Py_VISIT(state->ParseError);
3011
+ Py_VISIT(state->datetime_cls);
3012
+ Py_VISIT(state->timedelta_cls);
3013
+ Py_VISIT(state->fromisoformat_meth);
3014
+ #define VISIT_INTERNED(name, str) Py_VISIT(state->name);
3015
+ INTERNED_STRINGS(VISIT_INTERNED)
3016
+ #undef VISIT_INTERNED
3017
+ return 0;
3018
+ }
3019
+
3020
+ /*
3021
+ * Module clear function for GC - uses X-macro expansion.
3022
+ */
3023
+ static int
3024
+ m3u8_parser_clear(PyObject *module)
3025
+ {
3026
+ m3u8_state *state = get_m3u8_state(module);
3027
+ Py_CLEAR(state->ParseError);
3028
+ Py_CLEAR(state->datetime_cls);
3029
+ Py_CLEAR(state->timedelta_cls);
3030
+ Py_CLEAR(state->fromisoformat_meth);
3031
+ #define CLEAR_INTERNED(name, str) Py_CLEAR(state->name);
3032
+ INTERNED_STRINGS(CLEAR_INTERNED)
3033
+ #undef CLEAR_INTERNED
3034
+ return 0;
3035
+ }
3036
+
3037
+ /*
3038
+ * Module deallocation function.
3039
+ */
3040
+ static void
3041
+ m3u8_parser_free(void *module)
3042
+ {
3043
+ m3u8_parser_clear((PyObject *)module);
3044
+ }
3045
+
3046
+ /* Module definition */
3047
+ static struct PyModuleDef m3u8_parser_module = {
3048
+ PyModuleDef_HEAD_INIT,
3049
+ .m_name = "_m3u8_parser",
3050
+ .m_doc = "C extension for fast M3U8 playlist parsing.",
3051
+ .m_size = sizeof(m3u8_state),
3052
+ .m_methods = m3u8_parser_methods,
3053
+ .m_traverse = m3u8_parser_traverse,
3054
+ .m_clear = m3u8_parser_clear,
3055
+ .m_free = m3u8_parser_free,
3056
+ };
3057
+
3058
+ /*
3059
+ * Module initialization.
3060
+ *
3061
+ * Creates the module, initializes module state, and sets up cached objects.
3062
+ */
3063
+ PyMODINIT_FUNC
3064
+ PyInit__m3u8_parser(void)
3065
+ {
3066
+ PyObject *m = PyModule_Create(&m3u8_parser_module);
3067
+ if (m == NULL) {
3068
+ return NULL;
3069
+ }
3070
+
3071
+ m3u8_state *state = get_m3u8_state(m);
3072
+
3073
+ /* Initialize module state to NULL for safe cleanup on error */
3074
+ state->ParseError = NULL;
3075
+ state->datetime_cls = NULL;
3076
+ state->timedelta_cls = NULL;
3077
+ state->fromisoformat_meth = NULL;
3078
+ #define NULL_INTERNED(name, str) state->name = NULL;
3079
+ INTERNED_STRINGS(NULL_INTERNED)
3080
+ #undef NULL_INTERNED
3081
+
3082
+ /* Import ParseError from openm3u8.parser to use the same exception class */
3083
+ PyObject *parser_module = PyImport_ImportModule("openm3u8.parser");
3084
+ if (parser_module != NULL) {
3085
+ state->ParseError = PyObject_GetAttrString(parser_module, "ParseError");
3086
+ Py_DECREF(parser_module);
3087
+ }
3088
+
3089
+ /* Fallback: create our own ParseError if import fails */
3090
+ if (state->ParseError == NULL) {
3091
+ PyErr_Clear();
3092
+ state->ParseError = PyErr_NewException(
3093
+ "openm3u8._m3u8_parser.ParseError", PyExc_Exception, NULL);
3094
+ if (state->ParseError == NULL) {
3095
+ goto error;
3096
+ }
3097
+ }
3098
+
3099
+ /* Add ParseError to module (PyModule_AddObject steals a reference on success) */
3100
+ Py_INCREF(state->ParseError);
3101
+ if (PyModule_AddObject(m, "ParseError", state->ParseError) < 0) {
3102
+ Py_DECREF(state->ParseError);
3103
+ goto error;
3104
+ }
3105
+
3106
+ /* Initialize datetime cache */
3107
+ if (init_datetime_cache(state) < 0) {
3108
+ goto error;
3109
+ }
3110
+
3111
+ /* Initialize interned strings for common dict keys */
3112
+ if (init_interned_strings(state) < 0) {
3113
+ goto error;
3114
+ }
3115
+
3116
+ return m;
3117
+
3118
+ error:
3119
+ Py_DECREF(m);
3120
+ return NULL;
3121
+ }
3122
+