hctef 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ name: "Continuous integration"
2
+
3
+ concurrency:
4
+ group: ${{ github.ref }}
5
+ cancel-in-progress: false
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ pull_request:
12
+
13
+ jobs:
14
+ ci:
15
+ name: Continuous integration
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ matrix:
19
+ python-version:
20
+ - "3.12"
21
+ - "3.13"
22
+ - "3.14"
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - uses: astral-sh/setup-uv@v6
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ - name: Sync
29
+ run: |
30
+ uv sync \
31
+ --locked \
32
+ --all-extras \
33
+ --no-editable
34
+ - name: Pre-Commit Hooks
35
+ run: uv run pre-commit run --all-files
36
+ - name: Test
37
+ run: uv run pytest
38
+ - name: "Upload coverage to Codecov"
39
+ uses: codecov/codecov-action@v4
40
+ env:
41
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
42
+ with:
43
+ fail_ci_if_error: false
44
+ verbose: true
@@ -0,0 +1,47 @@
1
+ name: Build and release
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+ release:
11
+ types:
12
+ - published
13
+
14
+ jobs:
15
+ build-package:
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: astral-sh/setup-uv@v6
20
+ - name: Build
21
+ run: uv build
22
+ - name: Upload Artifact
23
+ uses: actions/upload-artifact@v4
24
+ if: startsWith(github.ref, 'refs/tags')
25
+ with:
26
+ name: dist-{github.ref}
27
+ path: dist/
28
+ overwrite: true
29
+ if-no-files-found: error
30
+
31
+ release-package:
32
+ if: startsWith(github.ref, 'refs/tags')
33
+ needs: build-package
34
+ runs-on: ubuntu-latest
35
+ environment:
36
+ name: pypi
37
+ url: https://pypi.org/p/hctef
38
+ permissions:
39
+ id-token: write
40
+ steps:
41
+ - name: Download a single artifact
42
+ uses: actions/download-artifact@v5
43
+ with:
44
+ name: dist-{github.ref}
45
+ path: dist/
46
+ - name: Upload release
47
+ uses: pypa/gh-action-pypi-publish@release/v1
hctef-0.1.0/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .coverage
13
+ __version__.py
@@ -0,0 +1,51 @@
1
+ exclude: "^tests/fixtures/"
2
+ repos:
3
+ - repo: local
4
+ hooks:
5
+ - id: ruff_check
6
+ name: ruff check
7
+ entry: ruff check --force-exclude
8
+ language: python
9
+ 'types_or': [python, pyi]
10
+ args: [--fix, --exit-non-zero-on-fix]
11
+ require_serial: true
12
+ - id: ruff_format
13
+ name: ruff format
14
+ entry: ruff format --force-exclude
15
+ language: python
16
+ 'types_or': [python, pyi]
17
+ args: []
18
+ require_serial: true
19
+ - id: check-added-large-files
20
+ name: Check for added large files
21
+ entry: check-added-large-files
22
+ language: system
23
+ - id: check-toml
24
+ name: Check Toml
25
+ entry: check-toml
26
+ language: system
27
+ types: [toml]
28
+ - id: check-yaml
29
+ name: Check Yaml
30
+ entry: check-yaml
31
+ language: system
32
+ types: [yaml]
33
+ - id: end-of-file-fixer
34
+ name: Fix End of Files
35
+ entry: end-of-file-fixer
36
+ language: system
37
+ types: [text]
38
+ stages: [pre-commit, pre-push, manual]
39
+ - id: trailing-whitespace
40
+ name: Trim Trailing Whitespace
41
+ entry: trailing-whitespace-fixer
42
+ language: system
43
+ types: [text]
44
+ stages: [pre-commit, pre-push, manual]
45
+ - id: mypy
46
+ name: mypy
47
+ entry: mypy
48
+ language: python
49
+ 'types_or': [python, pyi]
50
+ args: []
51
+ require_serial: true
hctef-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: hctef
3
+ Version: 0.1.0
4
+ Summary: Helper classes to read files over HTTP using Range requests, with caching
5
+ Project-URL: Repository, https://github.com/jkeifer/hctef
6
+ Author-email: Jarrett Keifer <jkeifer0@gmail.com>
7
+ License: Apache License 2.0
8
+ Requires-Python: >=3.12
9
+ Provides-Extra: async
10
+ Requires-Dist: aiohttp>=3.13.0; extra == 'async'
11
+ Description-Content-Type: text/markdown
12
+
13
+ [![Tests](https://github.com/jkeifer/hctef/actions/workflows/ci.yml/badge.svg)](https://github.com/jkeifer/hctef/actions/workflows/ci.yml)
14
+ [![PyPI version](https://badge.fury.io/py/hctef.svg)](https://badge.fury.io/py/hctef)
15
+
16
+ # hctef
17
+
18
+ Python library with helper classes to read files over HTTP using Range
19
+ requests, with caching.
20
+
21
+ ## Overview
22
+
23
+ `hctef` provides a file-like interface for reading files over HTTP/HTTPS, using
24
+ HTTP Range requests to fetch only the data you need. It includes intelligent
25
+ caching to minimize network requests and supports both synchronous and
26
+ asynchronous operations.
27
+
28
+ ## Features
29
+
30
+ - **File-like API**: Works like a regular Python file object with `read()`,
31
+ `seek()`, and `tell()` methods
32
+ - **Efficient Range Requests**: Fetches only the data you need using HTTP Range
33
+ headers
34
+ - **Intelligent Caching**: Uses an interval tree to track cached byte ranges
35
+ and minimize redundant requests
36
+ - **Prefetching**: Optionally prefetch data from the start or end of the file
37
+ - **Sync and Async**: Both synchronous and asynchronous implementations
38
+ available
39
+ - **Context Manager Support**: Use with `with` statements for automatic cleanup
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install hctef
45
+ ```
46
+
47
+ To include async support:
48
+
49
+ ```bash
50
+ pip install hctef[async]
51
+ ```
52
+
53
+ ## Quick Start
54
+
55
+ ### Synchronous Usage
56
+
57
+ ```python
58
+ from hctef import HttpFile
59
+
60
+ url = "https://example.com/large-file.bin"
61
+
62
+ with HttpFile(url) as f:
63
+ # Read first 100 bytes
64
+ data = f.read(100)
65
+
66
+ # Seek to a specific position
67
+ f.seek(1000)
68
+
69
+ # Read from current position
70
+ more_data = f.read(50)
71
+
72
+ # Get current position
73
+ position = f.tell()
74
+
75
+ # Seek relative to end of file
76
+ f.seek(-100, 2)
77
+ ```
78
+
79
+ ### Asynchronous Usage
80
+
81
+ The async implementation supports independent cursors for concurrent reads:
82
+
83
+ ```python
84
+ import asyncio
85
+ from hctef.aio import AsyncHttpFile
86
+
87
+ url = "https://example.com/large-file.bin"
88
+
89
+ async with AsyncHttpFile(url) as f:
90
+ # Read first 100 bytes
91
+ data = await f.read(100)
92
+
93
+ # Seek to a specific position (synchronous - no I/O)
94
+ f.seek(1000)
95
+
96
+ # Read from current position
97
+ more_data = await f.read(50)
98
+ ```
99
+
100
+ #### Parallel Reads with Multiple Cursors
101
+
102
+ Create independent cursors to read from different positions concurrently:
103
+
104
+ ```python
105
+ import asyncio
106
+ from hctef.aio import AsyncHttpFile
107
+
108
+ url = "https://example.com/large-file.bin"
109
+
110
+ async with AsyncHttpFile(url) as f:
111
+ # Create independent cursors for parallel reading
112
+ cursor1 = f.clone()
113
+ cursor2 = f.clone()
114
+
115
+ # Position each cursor at different locations
116
+ f.seek(0)
117
+ cursor1.seek(1000)
118
+ cursor2.seek(2000)
119
+
120
+ # Read from all three positions in parallel
121
+ # All cursors share the same cache and HTTP session
122
+ results = await asyncio.gather(
123
+ f.read(100), # Read bytes 0-100
124
+ cursor1.read(100), # Read bytes 1000-1100
125
+ cursor2.read(100), # Read bytes 2000-2100
126
+ )
127
+
128
+ # Each cursor maintains independent position
129
+ print(f.tell()) # 100
130
+ print(cursor1.tell()) # 1100
131
+ print(cursor2.tell()) # 2100
132
+ ```
133
+
134
+ Cursors are lightweight and share:
135
+
136
+ - HTTP session (connection pooling)
137
+ - Byte range cache (deduplication of overlapping requests)
138
+ - File metadata
139
+
140
+ ## Configuration Options
141
+
142
+ Both `HttpFile` and `AsyncHttpFile` accept the following parameters:
143
+
144
+ ```python
145
+ HttpFile(
146
+ url,
147
+ minimum_range_request_bytes=8192, # Minimum bytes per request (default: 8KB)
148
+ prefetch_bytes=1048576, # Bytes to prefetch on open (default: 1MB)
149
+ prefetch_direction='END' # 'START' or 'END' (default: 'END')
150
+ )
151
+ ```
152
+
153
+ - **`minimum_range_request_bytes`**: The minimum number of bytes to request in
154
+ a single HTTP Range request (except when filling small cache gaps)
155
+ - **`prefetch_bytes`**: How many bytes to fetch immediately when opening the
156
+ file. Set to 0 to disable prefetching
157
+ - **`prefetch_direction`**: Whether to prefetch from the start (`'START'`) or
158
+ end (`'END'`) of the file
159
+
160
+ ## Requirements
161
+
162
+ - Python 3.12 or higher
163
+ - HTTP server must support Range requests
164
+ - For async: `aiohttp>=3.13.0`
165
+
166
+ ## How It Works
167
+
168
+ When you open an HTTP file, `hctef`:
169
+
170
+ 1. Sends an initial Range request to determine the file size and verify Range
171
+ support
172
+ 1. Optionally prefetches data from the start or end of the file
173
+ 1. Maintains an in-memory cache of fetched byte ranges (not suitable for
174
+ downloading complete large files)
175
+ 1. On `read()`, checks the cache first and only fetches missing data from the
176
+ server
177
+ 1. Combines multiple small requests into larger ones based on
178
+ `minimum_range_request_bytes`
179
+
180
+ This approach minimizes HTTP requests while providing efficient random access
181
+ to remote files.
182
+
183
+ ## Error Handling
184
+
185
+ `hctef` defines custom exceptions:
186
+
187
+ - `HctefError`: Base exception class
188
+ - `HctefNetworkError`: Raised for network-related errors (inherits from
189
+ `IOError`)
190
+ - `HctefUrlError`: Raised for invalid URLs (inherits from `ValueError`)
191
+
192
+ ```python
193
+ from hctef import HttpFile
194
+ from hctef.exceptions import HctefNetworkError, HctefUrlError
195
+
196
+ try:
197
+ with HttpFile("https://example.com/file.bin") as f:
198
+ data = f.read(100)
199
+ except HctefNetworkError as e:
200
+ print(f"Network error: {e}")
201
+ except HctefUrlError as e:
202
+ print(f"Invalid URL: {e}")
203
+ ```
204
+
205
+ ## Development
206
+
207
+ To set up for development:
208
+
209
+ ```bash
210
+ # Clone the repository
211
+ git clone https://github.com/jkeifer/hctef
212
+ cd hctef
213
+
214
+ # Install dependencies
215
+ uv sync --all-extras --dev
216
+
217
+ # Setup pre-commit
218
+ pre-commit install
219
+
220
+ # Run tests
221
+ pytest
222
+
223
+ # Run all checks with pre-commit
224
+ pre-commit run --all-files
225
+ ```
226
+
227
+ ## Future Ideas
228
+
229
+ - Consoldiate sync/async implementations
230
+ - Allow uncached "cursor" for reading a large file segement
231
+ - Cursors with separate caches (to allow clearing memory when done)
232
+ - would allow cursor-based access with non-async implementation
233
+
234
+ ## License
235
+
236
+ Apache License 2.0
hctef-0.1.0/README.md ADDED
@@ -0,0 +1,224 @@
1
+ [![Tests](https://github.com/jkeifer/hctef/actions/workflows/ci.yml/badge.svg)](https://github.com/jkeifer/hctef/actions/workflows/ci.yml)
2
+ [![PyPI version](https://badge.fury.io/py/hctef.svg)](https://badge.fury.io/py/hctef)
3
+
4
+ # hctef
5
+
6
+ Python library with helper classes to read files over HTTP using Range
7
+ requests, with caching.
8
+
9
+ ## Overview
10
+
11
+ `hctef` provides a file-like interface for reading files over HTTP/HTTPS, using
12
+ HTTP Range requests to fetch only the data you need. It includes intelligent
13
+ caching to minimize network requests and supports both synchronous and
14
+ asynchronous operations.
15
+
16
+ ## Features
17
+
18
+ - **File-like API**: Works like a regular Python file object with `read()`,
19
+ `seek()`, and `tell()` methods
20
+ - **Efficient Range Requests**: Fetches only the data you need using HTTP Range
21
+ headers
22
+ - **Intelligent Caching**: Uses an interval tree to track cached byte ranges
23
+ and minimize redundant requests
24
+ - **Prefetching**: Optionally prefetch data from the start or end of the file
25
+ - **Sync and Async**: Both synchronous and asynchronous implementations
26
+ available
27
+ - **Context Manager Support**: Use with `with` statements for automatic cleanup
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install hctef
33
+ ```
34
+
35
+ To include async support:
36
+
37
+ ```bash
38
+ pip install hctef[async]
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ### Synchronous Usage
44
+
45
+ ```python
46
+ from hctef import HttpFile
47
+
48
+ url = "https://example.com/large-file.bin"
49
+
50
+ with HttpFile(url) as f:
51
+ # Read first 100 bytes
52
+ data = f.read(100)
53
+
54
+ # Seek to a specific position
55
+ f.seek(1000)
56
+
57
+ # Read from current position
58
+ more_data = f.read(50)
59
+
60
+ # Get current position
61
+ position = f.tell()
62
+
63
+ # Seek relative to end of file
64
+ f.seek(-100, 2)
65
+ ```
66
+
67
+ ### Asynchronous Usage
68
+
69
+ The async implementation supports independent cursors for concurrent reads:
70
+
71
+ ```python
72
+ import asyncio
73
+ from hctef.aio import AsyncHttpFile
74
+
75
+ url = "https://example.com/large-file.bin"
76
+
77
+ async with AsyncHttpFile(url) as f:
78
+ # Read first 100 bytes
79
+ data = await f.read(100)
80
+
81
+ # Seek to a specific position (synchronous - no I/O)
82
+ f.seek(1000)
83
+
84
+ # Read from current position
85
+ more_data = await f.read(50)
86
+ ```
87
+
88
+ #### Parallel Reads with Multiple Cursors
89
+
90
+ Create independent cursors to read from different positions concurrently:
91
+
92
+ ```python
93
+ import asyncio
94
+ from hctef.aio import AsyncHttpFile
95
+
96
+ url = "https://example.com/large-file.bin"
97
+
98
+ async with AsyncHttpFile(url) as f:
99
+ # Create independent cursors for parallel reading
100
+ cursor1 = f.clone()
101
+ cursor2 = f.clone()
102
+
103
+ # Position each cursor at different locations
104
+ f.seek(0)
105
+ cursor1.seek(1000)
106
+ cursor2.seek(2000)
107
+
108
+ # Read from all three positions in parallel
109
+ # All cursors share the same cache and HTTP session
110
+ results = await asyncio.gather(
111
+ f.read(100), # Read bytes 0-100
112
+ cursor1.read(100), # Read bytes 1000-1100
113
+ cursor2.read(100), # Read bytes 2000-2100
114
+ )
115
+
116
+ # Each cursor maintains independent position
117
+ print(f.tell()) # 100
118
+ print(cursor1.tell()) # 1100
119
+ print(cursor2.tell()) # 2100
120
+ ```
121
+
122
+ Cursors are lightweight and share:
123
+
124
+ - HTTP session (connection pooling)
125
+ - Byte range cache (deduplication of overlapping requests)
126
+ - File metadata
127
+
128
+ ## Configuration Options
129
+
130
+ Both `HttpFile` and `AsyncHttpFile` accept the following parameters:
131
+
132
+ ```python
133
+ HttpFile(
134
+ url,
135
+ minimum_range_request_bytes=8192, # Minimum bytes per request (default: 8KB)
136
+ prefetch_bytes=1048576, # Bytes to prefetch on open (default: 1MB)
137
+ prefetch_direction='END' # 'START' or 'END' (default: 'END')
138
+ )
139
+ ```
140
+
141
+ - **`minimum_range_request_bytes`**: The minimum number of bytes to request in
142
+ a single HTTP Range request (except when filling small cache gaps)
143
+ - **`prefetch_bytes`**: How many bytes to fetch immediately when opening the
144
+ file. Set to 0 to disable prefetching
145
+ - **`prefetch_direction`**: Whether to prefetch from the start (`'START'`) or
146
+ end (`'END'`) of the file
147
+
148
+ ## Requirements
149
+
150
+ - Python 3.12 or higher
151
+ - HTTP server must support Range requests
152
+ - For async: `aiohttp>=3.13.0`
153
+
154
+ ## How It Works
155
+
156
+ When you open an HTTP file, `hctef`:
157
+
158
+ 1. Sends an initial Range request to determine the file size and verify Range
159
+ support
160
+ 1. Optionally prefetches data from the start or end of the file
161
+ 1. Maintains an in-memory cache of fetched byte ranges (not suitable for
162
+ downloading complete large files)
163
+ 1. On `read()`, checks the cache first and only fetches missing data from the
164
+ server
165
+ 1. Combines multiple small requests into larger ones based on
166
+ `minimum_range_request_bytes`
167
+
168
+ This approach minimizes HTTP requests while providing efficient random access
169
+ to remote files.
170
+
171
+ ## Error Handling
172
+
173
+ `hctef` defines custom exceptions:
174
+
175
+ - `HctefError`: Base exception class
176
+ - `HctefNetworkError`: Raised for network-related errors (inherits from
177
+ `IOError`)
178
+ - `HctefUrlError`: Raised for invalid URLs (inherits from `ValueError`)
179
+
180
+ ```python
181
+ from hctef import HttpFile
182
+ from hctef.exceptions import HctefNetworkError, HctefUrlError
183
+
184
+ try:
185
+ with HttpFile("https://example.com/file.bin") as f:
186
+ data = f.read(100)
187
+ except HctefNetworkError as e:
188
+ print(f"Network error: {e}")
189
+ except HctefUrlError as e:
190
+ print(f"Invalid URL: {e}")
191
+ ```
192
+
193
+ ## Development
194
+
195
+ To set up for development:
196
+
197
+ ```bash
198
+ # Clone the repository
199
+ git clone https://github.com/jkeifer/hctef
200
+ cd hctef
201
+
202
+ # Install dependencies
203
+ uv sync --all-extras --dev
204
+
205
+ # Setup pre-commit
206
+ pre-commit install
207
+
208
+ # Run tests
209
+ pytest
210
+
211
+ # Run all checks with pre-commit
212
+ pre-commit run --all-files
213
+ ```
214
+
215
+ ## Future Ideas
216
+
217
+ - Consoldiate sync/async implementations
218
+ - Allow uncached "cursor" for reading a large file segement
219
+ - Cursors with separate caches (to allow clearing memory when done)
220
+ - would allow cursor-based access with non-async implementation
221
+
222
+ ## License
223
+
224
+ Apache License 2.0
@@ -0,0 +1,99 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "hctef"
7
+ description = "Helper classes to read files over HTTP using Range requests, with caching"
8
+ readme = "README.md"
9
+ authors = [
10
+ { name = "Jarrett Keifer", email = "jkeifer0@gmail.com" }
11
+ ]
12
+ requires-python = ">=3.12"
13
+ license = {text = "Apache License 2.0"}
14
+ dependencies = []
15
+ dynamic = ["version"]
16
+
17
+ [project.urls]
18
+ Repository = 'https://github.com/jkeifer/hctef'
19
+
20
+ [project.optional-dependencies]
21
+ async = [
22
+ "aiohttp>=3.13.0",
23
+ ]
24
+
25
+ [dependency-groups]
26
+ dev = [
27
+ "mypy>=1.15.0",
28
+ "pre-commit>=4.2.0",
29
+ "pre-commit-hooks>=5.0.0",
30
+ "pytest>=8.3.5",
31
+ "pytest-asyncio>=1.2.0",
32
+ "pytest-cov>=6.1.1",
33
+ "ruff>=0.11.6",
34
+ ]
35
+
36
+ [tool.hatch.version]
37
+ source = "vcs"
38
+
39
+ [tool.hatch.build.hooks.vcs]
40
+ version-file = "src/hctef/__version__.py"
41
+
42
+ [tool.hatch.build.targets.wheel]
43
+ packages = ["src/hctef"]
44
+
45
+ [tool.ruff.format]
46
+ quote-style = 'single'
47
+
48
+ [tool.ruff.lint]
49
+ exclude = [
50
+ 'tests/fixtures/',
51
+ ]
52
+ # https://docs.astral.sh/ruff/rules/
53
+ select = [
54
+ 'B',
55
+ 'BLE',
56
+ 'C4',
57
+ 'C90',
58
+ 'COM',
59
+ 'DTZ',
60
+ 'E',
61
+ 'ERA',
62
+ 'F',
63
+ 'FA',
64
+ 'G',
65
+ 'I',
66
+ 'INP',
67
+ 'N',
68
+ 'NPY',
69
+ 'PT',
70
+ 'PTH',
71
+ 'RET',
72
+ 'RUF',
73
+ 'S',
74
+ 'SIM',
75
+ 'T20',
76
+ 'UP',
77
+ 'W',
78
+
79
+ ]
80
+
81
+ [tool.ruff.lint.per-file-ignores]
82
+ '__init__.py' = ['E402']
83
+ 'tests/**/*' = ['T201', 'S101', 'S603']
84
+
85
+ [tool.ruff.lint.isort]
86
+ lines-between-types = 1
87
+
88
+ [tool.mypy]
89
+ ignore_missing_imports = true
90
+ scripts_are_modules = true
91
+ disable_error_code = 'prop-decorator'
92
+
93
+ [tool.pytest.ini_options]
94
+ addopts="--cov=hctef"
95
+
96
+ [tool.coverage.report]
97
+ show_missing = true
98
+ skip_empty = true
99
+ sort = "Cover"