gtfs-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ This project is a CLI tool to fetch, archive, process and explore GTFS data, in particular GTFS-RT (Real-Time).
2
+
3
+ ## Tech stack
4
+
5
+ - Typer for CLI framework, Rich for nice display
6
+ - gtfs-realtime-bindings for protobuf parsing
7
+ - httpx Python library for HTTP connection
8
+ - polars Python library for transformations
9
+ - pyarrow for parquet output
10
+ - uv for Python package and env management
11
+ - pytest for unit tests
12
+
13
+ ## Online resources allowed to fetch
14
+
15
+ You are allowed to fetch from the following domains (full links provided as a helper):
16
+
17
+ https://gtfs.org/documentation/realtime/reference - GTFS RT specification
18
+ https://gtfs.org/documentation/realtime/language-bindings/python/ - GTFS Python bindings docs
19
+ https://typer.tiangolo.com/tutorial/ - Typer CLI docs/tutorial
20
+ https://docs.astral.sh/uv/ - uv Python package manager
21
+ https://docs.pola.rs/api/python - for polars docs
22
+ https://www.python-httpx.org/ - HTTPX docs
23
+
24
+ ## GTSF-RT TTC Toronto feed we are using for testing
25
+
26
+ Base URL: `https://gtfsrt.ttc.ca`
27
+
28
+ ### Available Feeds
29
+
30
+ **Service Alerts**
31
+ - Combined: `/alerts/all?format=binary` (all alerts: subway, bus, streetcar, accessiblity, stops)
32
+
33
+ **Trip Updates**
34
+ - Trip Updates: `/trips/update?format=binary`
35
+ - Modified Trip Updates: `/trips/modified_update?format=binary`
36
+
37
+ **Vehicle Positions**
38
+ - Vehicle Positions: `/vehicles/position?format=binary`
39
+
40
+ ### Testing & Development
41
+
42
+ **Primary test feeds:**
43
+ - Trip updates: `https://gtfsrt.ttc.ca/trips/update?format=binary`
44
+ - Vehicle positions: `https://gtfsrt.ttc.ca/vehicles/position?format=binary`
45
+ - All alerts: `https://gtfsrt.ttc.ca/alerts/all?format=binary`
46
+
47
+ **Feed format:** Binary protobuf (use `format=text` for human-readable textproto during debugging)
48
+
49
+ **Sample fetch command for testing:**
50
+ ```bash
51
+ curl https://gtfsrt.ttc.ca/trips/update?format=binary -o test_trip_updates.pb
52
+ ```
53
+
54
+ ## Guideline on build CLI
55
+
56
+ - Include usage examples in --help
57
+ - CLI interfaces are an API - version appropriately with semantic versioning
58
+ - Write unit tests whenever you can; make sure not to use network when they are run
59
+
60
+ ## Project structure
61
+
62
+ Use console scripts in pyproject.toml
63
+
64
+ gtfs-cli/
65
+ ├── pyproject.toml # uv managed
66
+ ├── src/
67
+ │ └── gtfs_cli/
68
+ │ ├── __main__.py # Entry point
69
+ │ ├── main.py # Typer app + version
70
+ │ ├── commands/
71
+ │ │ ├── __init__.py
72
+ ├── tests/
73
+ │ └── fixtures/
74
+ │ └── test_abc123.py
75
+ ├── docs/ # contains description for different features for implementation
@@ -0,0 +1,14 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebFetch(domain:typer.tiangolo.com)",
5
+ "WebFetch(domain:gtfs.org)",
6
+ "WebFetch(domain:gtfsrt.ttc.ca)",
7
+ "WebFetch(domain:docs.astral.sh)",
8
+ "WebFetch(domain:docs.pola.rs)",
9
+ "WebFetch(domain:python-httpx.org)"
10
+ ],
11
+ "deny": [],
12
+ "ask": []
13
+ }
14
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run pytest:*)",
5
+ "Bash(git checkout *)",
6
+ "Bash(git rm *)",
7
+ "Bash(git commit -m ' *)",
8
+ "Bash(git add *)"
9
+ ]
10
+ }
11
+ }
@@ -0,0 +1,207 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
gtfs_cli-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vladyslav Moisieienkov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: gtfs-cli
3
+ Version: 0.1.0
4
+ Summary: CLI tool to fetch, archive, process and explore GTFS-RT data
5
+ Project-URL: Homepage, https://github.com/VMois/gtfs-cli
6
+ Project-URL: Repository, https://github.com/VMois/gtfs-cli
7
+ Project-URL: Bug Tracker, https://github.com/VMois/gtfs-cli/issues
8
+ Author: Vladyslav Moisieienkov
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: cli,gtfs,gtfs-rt,realtime,transit
12
+ Classifier: Environment :: Console
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: GIS
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: gtfs-realtime-bindings>=1.0.0
21
+ Requires-Dist: httpx>=0.28.0
22
+ Requires-Dist: protobuf>=5.0.0
23
+ Requires-Dist: rich>=13.0.0
24
+ Requires-Dist: typer>=0.15.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # gtfs-cli
28
+
29
+ CLI tool to fetch, archive, process and explore [GTFS-RT](https://gtfs.org/documentation/realtime/reference/) (General Transit Feed Specification — Realtime) data. GTFS-RT feeds provide live transit information: trip updates, vehicle positions, and service alerts in protobuf format.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ uv tool install gtfs-cli
35
+ ```
36
+
37
+ After installation, the `gtfs-cli` command is available globally.
38
+
39
+ ## Commands
40
+
41
+ ### `fetch`
42
+
43
+ Fetch a GTFS-RT feed from a URL or local file and output it as JSON.
44
+
45
+ ```bash
46
+ # Fetch live trip updates
47
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary"
48
+
49
+ # Fetch vehicle positions
50
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/vehicles/position?format=binary"
51
+
52
+ # Fetch service alerts
53
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/alerts/all?format=binary"
54
+
55
+ # Inspect a previously saved .pb file
56
+ gtfs-cli fetch trips.pb
57
+ ```
58
+
59
+ **Filtering with jq:**
60
+
61
+ ```bash
62
+ # List all active alerts
63
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/alerts/all?format=binary" | jq '.entity[] | .alert'
64
+
65
+ # Count entities in a feed
66
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary" | jq '.entity | length'
67
+
68
+ # Extract all trip IDs
69
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary" \
70
+ | jq '[.entity[].trip_update.trip.trip_id]'
71
+ ```
72
+
73
+ **Watch mode** — continuously poll a feed and stream NDJSON (one JSON object per line):
74
+
75
+ ```bash
76
+ # Poll every 30 seconds
77
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary"
78
+
79
+ # Count entities on each snapshot
80
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary" \
81
+ | jq --unbuffered '.entity | length'
82
+
83
+ # Save a long-running collection to a file
84
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary" \
85
+ >> snapshots.ndjson
86
+ ```
87
+
88
+ Watch mode handles transient failures gracefully: HTTP and network errors are retried with exponential backoff (1s → 2s → 4s … capped at 60s). Stop with `Ctrl+C` or `SIGTERM`.
89
+
90
+ For all available options, run:
91
+
92
+ ```bash
93
+ gtfs-cli fetch --help
94
+ ```
95
+
96
+ ## Development
97
+
98
+ ```bash
99
+ # Install dependencies
100
+ uv sync
101
+
102
+ # Run a command
103
+ uv run gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary"
104
+
105
+ # Run tests
106
+ uv run pytest tests/ -v
107
+ ```
@@ -0,0 +1,81 @@
1
+ # gtfs-cli
2
+
3
+ CLI tool to fetch, archive, process and explore [GTFS-RT](https://gtfs.org/documentation/realtime/reference/) (General Transit Feed Specification — Realtime) data. GTFS-RT feeds provide live transit information: trip updates, vehicle positions, and service alerts in protobuf format.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ uv tool install gtfs-cli
9
+ ```
10
+
11
+ After installation, the `gtfs-cli` command is available globally.
12
+
13
+ ## Commands
14
+
15
+ ### `fetch`
16
+
17
+ Fetch a GTFS-RT feed from a URL or local file and output it as JSON.
18
+
19
+ ```bash
20
+ # Fetch live trip updates
21
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary"
22
+
23
+ # Fetch vehicle positions
24
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/vehicles/position?format=binary"
25
+
26
+ # Fetch service alerts
27
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/alerts/all?format=binary"
28
+
29
+ # Inspect a previously saved .pb file
30
+ gtfs-cli fetch trips.pb
31
+ ```
32
+
33
+ **Filtering with jq:**
34
+
35
+ ```bash
36
+ # List all active alerts
37
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/alerts/all?format=binary" | jq '.entity[] | .alert'
38
+
39
+ # Count entities in a feed
40
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary" | jq '.entity | length'
41
+
42
+ # Extract all trip IDs
43
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary" \
44
+ | jq '[.entity[].trip_update.trip.trip_id]'
45
+ ```
46
+
47
+ **Watch mode** — continuously poll a feed and stream NDJSON (one JSON object per line):
48
+
49
+ ```bash
50
+ # Poll every 30 seconds
51
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary"
52
+
53
+ # Count entities on each snapshot
54
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary" \
55
+ | jq --unbuffered '.entity | length'
56
+
57
+ # Save a long-running collection to a file
58
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary" \
59
+ >> snapshots.ndjson
60
+ ```
61
+
62
+ Watch mode handles transient failures gracefully: HTTP and network errors are retried with exponential backoff (1s → 2s → 4s … capped at 60s). Stop with `Ctrl+C` or `SIGTERM`.
63
+
64
+ For all available options, run:
65
+
66
+ ```bash
67
+ gtfs-cli fetch --help
68
+ ```
69
+
70
+ ## Development
71
+
72
+ ```bash
73
+ # Install dependencies
74
+ uv sync
75
+
76
+ # Run a command
77
+ uv run gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary"
78
+
79
+ # Run tests
80
+ uv run pytest tests/ -v
81
+ ```
@@ -0,0 +1,116 @@
1
+ # `gtfs-cli fetch` Command Design
2
+
3
+ ## Purpose
4
+
5
+ Load GTFS-RT data from any source (URL or local file) and output it in a human-readable format. This is the single entry point for accessing GTFS-RT data — it handles acquisition and decoding as one cohesive step.
6
+
7
+ ## Usage
8
+
9
+ ```
10
+ gtfs-cli fetch <source> [OPTIONS]
11
+ ```
12
+
13
+ `source` is either an HTTP(S) URL or a local file path. Auto-detected.
14
+
15
+ ## Core Parameters
16
+
17
+ | Parameter | Description |
18
+ |-----------|-------------|
19
+ | `source` (argument) | URL or local file path to a GTFS-RT protobuf |
20
+
21
+ ## Output Behavior
22
+
23
+ - **JSON to stdout by default** — human-readable and machine-parseable
24
+ - Errors and status info go to stderr, never stdout (keeps pipes clean)
25
+ - Exit code 0 on success, non-zero on HTTP/network/parse errors
26
+
27
+ ### Future: Smart Output Detection
28
+
29
+ When stdout is a **terminal**, output JSON (human-readable). When stdout is a **pipe**, output length-delimited protobuf (efficient binary streaming between gtfs-cli commands). `--format` overrides auto-detection. This avoids wasteful proto-to-JSON-to-proto conversion in pipelines while keeping interactive use friendly.
30
+
31
+ ## Source Detection
32
+
33
+ - Starts with `http://` or `https://` — treated as URL, fetched via HTTP
34
+ - Otherwise — treated as a local file path, read from disk
35
+ - This means saved snapshots and live feeds use the exact same interface
36
+
37
+ ## Examples
38
+
39
+ ```bash
40
+ # Quick look at a live feed (JSON output)
41
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/trips/update?format=binary"
42
+
43
+ # Inspect a previously saved file
44
+ gtfs-cli fetch trips.pb
45
+
46
+ # Pipe JSON to jq for quick filtering
47
+ gtfs-cli fetch "https://gtfsrt.ttc.ca/alerts/all?format=binary" | jq '.entity[] | .alert'
48
+ ```
49
+
50
+ ## Use Cases
51
+
52
+ - **Quick inspection**: `gtfs-cli fetch <url>` to see what a feed looks like right now.
53
+ - **Debugging saved files**: same command works on URLs and local files.
54
+ - **Pipeline integration**: JSON output pipes cleanly into jq, scripts, or future gtfs-cli commands.
55
+
56
+ ## IMPLEMENTED
57
+ - `fetch <source>` — URL or local file
58
+ - JSON output to stdout
59
+ - `--timeout` for HTTP sources
60
+ - `--watch <seconds>` with NDJSON streaming, flush-after-write, graceful SIGINT handling
61
+
62
+ ## Watch Mode
63
+
64
+ `--watch <seconds>` enables continuous fetching at a fixed interval. Each fetch produces one complete JSON object, output as **NDJSON (Newline-Delimited JSON)** — one JSON document per line, flushed immediately after each write.
65
+
66
+ ```bash
67
+ # Fetch trip updates every 30 seconds, filter with jq
68
+ gtfs-cli fetch --watch 30 "https://gtfsrt.ttc.ca/trips/update?format=binary" | jq --unbuffered '.entity[]'
69
+ ```
70
+
71
+ ### Why NDJSON
72
+
73
+ - Each line is a self-contained JSON document — consumers parse line by line
74
+ - `jq` handles this natively, no special flags needed to parse (use `--unbuffered` to avoid jq's own buffering)
75
+ - Python consumers: `for line in sys.stdin: data = json.loads(line)`
76
+ - Plain concatenated JSON (`{}{}`) is not valid JSON — NDJSON uses `\n` as a delimiter to avoid this
77
+
78
+ ### Implementation Notes
79
+
80
+ - Flush stdout after each JSON write so data reaches the consumer immediately
81
+ - Graceful SIGINT (Ctrl+C) handling — stop cleanly, exit 0
82
+ - Only applies to URL sources (watching a local file doesn't make sense)
83
+
84
+ ## POTENTIAL FEATURES
85
+ - `--format binary|table` output options
86
+ - `--output` file writing
87
+ - TTY auto-detection for output format (JSON for terminal, length-delimited protobuf for pipes)
88
+
89
+ ### Watch reliability improvements
90
+
91
+ These improvements target long-running `--watch` sessions (hours/days of data collection):
92
+
93
+ - **Persistent HTTP client** — reuse a single `httpx.Client` with connection pooling instead of creating a new connection per request. Reduces TCP/TLS handshake overhead and is kinder to the server.
94
+ - **Exponential backoff on consecutive failures** — on transient errors (timeout, 503), increase sleep between retries (e.g. 1s → 2s → 4s → …, capped). Reset backoff on success. Avoids hammering a struggling server.
95
+ - **SIGTERM handling** — catch `SIGTERM` (via `signal` module) for clean shutdown when run as a systemd service or in Docker. Currently only `KeyboardInterrupt` (SIGINT) is handled.
96
+ - **Drift-corrected sleep** — instead of `time.sleep(interval)` after each fetch (which drifts by fetch duration), compute the next wall-clock target and sleep until that time. Keeps spacing consistent.
97
+
98
+ ### Output durability for long-running collection
99
+
100
+ For serious data collection, NDJSON-to-stdout has limits — a crash mid-write can produce partial lines, and restarts lose context. Two durable storage options:
101
+
102
+ **SQLite (`--output collection.db`)**
103
+ - Append each snapshot as a row with a timestamp. Inherently atomic per transaction — no partial writes.
104
+ - Good for: moderate volume, easy querying (`SELECT * WHERE timestamp > ...`), single-file portability.
105
+ - Schema: `(id INTEGER PRIMARY KEY, fetched_at TEXT, feed_json TEXT)` or normalized tables for entities.
106
+ - Downside: JSON-in-a-column isn't great for columnar analytics. File can grow large without vacuuming.
107
+
108
+ **Parquet (`--output collection.parquet` or `--output-dir snapshots/`)**
109
+ - Flatten the protobuf into columnar format using polars. One row per entity (trip update / vehicle position / alert), with the snapshot timestamp added as a column.
110
+ - Good for: large-scale analytics, efficient compression, direct use with polars/pandas/DuckDB.
111
+ - Two strategies:
112
+ - **Single file with append**: buffer N snapshots in memory, append as a row group periodically. Risk: crash loses the buffer.
113
+ - **Partitioned directory**: write one parquet file per snapshot (or per time window, e.g. hourly). Crash-safe — each file is complete. Use `polars.scan_parquet("snapshots/*.parquet")` to query across them lazily.
114
+ - Partitioned directory is the safer choice for long-running collection. Hourly rotation keeps file count manageable.
115
+
116
+ **Recommendation**: start with partitioned parquet directory — it's crash-safe, works naturally with polars, and avoids the JSON-in-SQLite compromise. A future `gtfs-cli explore` command can `scan_parquet` the directory for analysis.
@@ -0,0 +1,45 @@
1
+ [project]
2
+ name = "gtfs-cli"
3
+ version = "0.1.0"
4
+ description = "CLI tool to fetch, archive, process and explore GTFS-RT data"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.11"
8
+ authors = [{ name = "Vladyslav Moisieienkov" }]
9
+ keywords = ["gtfs", "gtfs-rt", "transit", "cli", "realtime"]
10
+ classifiers = [
11
+ "Programming Language :: Python :: 3",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Operating System :: OS Independent",
16
+ "Environment :: Console",
17
+ "Topic :: Scientific/Engineering :: GIS",
18
+ ]
19
+ dependencies = [
20
+ "typer>=0.15.0",
21
+ "rich>=13.0.0",
22
+ "gtfs-realtime-bindings>=1.0.0",
23
+ "httpx>=0.28.0",
24
+ "protobuf>=5.0.0",
25
+ ]
26
+
27
+ [project.scripts]
28
+ gtfs-cli = "gtfs_cli.main:app"
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/VMois/gtfs-cli"
32
+ Repository = "https://github.com/VMois/gtfs-cli"
33
+ "Bug Tracker" = "https://github.com/VMois/gtfs-cli/issues"
34
+
35
+ [build-system]
36
+ requires = ["hatchling"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/gtfs_cli"]
41
+
42
+ [dependency-groups]
43
+ dev = [
44
+ "pytest>=8.0.0",
45
+ ]
File without changes
@@ -0,0 +1,3 @@
1
+ from gtfs_cli.main import app
2
+
3
+ app()
File without changes