pegasource 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. pegasource-0.1.0/PKG-INFO +215 -0
  2. pegasource-0.1.0/README.md +180 -0
  3. pegasource-0.1.0/pegasource/__init__.py +12 -0
  4. pegasource-0.1.0/pegasource/data/README.md +15 -0
  5. pegasource-0.1.0/pegasource/geo/__init__.py +36 -0
  6. pegasource-0.1.0/pegasource/geo/_rv_graph_builder.py +455 -0
  7. pegasource-0.1.0/pegasource/geo/_rv_preprocessing.py +62 -0
  8. pegasource-0.1.0/pegasource/geo/_rv_visualization.py +140 -0
  9. pegasource-0.1.0/pegasource/geo/distance.py +156 -0
  10. pegasource-0.1.0/pegasource/geo/israel_roads.py +396 -0
  11. pegasource-0.1.0/pegasource/geo/projection.py +148 -0
  12. pegasource-0.1.0/pegasource/geo/vectorizer.py +21 -0
  13. pegasource-0.1.0/pegasource/pcap/__init__.py +36 -0
  14. pegasource-0.1.0/pegasource/pcap/patterns.py +402 -0
  15. pegasource-0.1.0/pegasource/pcap/reader.py +118 -0
  16. pegasource-0.1.0/pegasource/pcap/report.py +103 -0
  17. pegasource-0.1.0/pegasource/pcap/stats.py +152 -0
  18. pegasource-0.1.0/pegasource/timeseries/__init__.py +30 -0
  19. pegasource-0.1.0/pegasource/timeseries/auto.py +256 -0
  20. pegasource-0.1.0/pegasource/timeseries/models.py +186 -0
  21. pegasource-0.1.0/pegasource/timeseries/utils.py +98 -0
  22. pegasource-0.1.0/pegasource.egg-info/PKG-INFO +215 -0
  23. pegasource-0.1.0/pegasource.egg-info/SOURCES.txt +30 -0
  24. pegasource-0.1.0/pegasource.egg-info/dependency_links.txt +1 -0
  25. pegasource-0.1.0/pegasource.egg-info/entry_points.txt +2 -0
  26. pegasource-0.1.0/pegasource.egg-info/requires.txt +18 -0
  27. pegasource-0.1.0/pegasource.egg-info/top_level.txt +1 -0
  28. pegasource-0.1.0/pyproject.toml +64 -0
  29. pegasource-0.1.0/setup.cfg +4 -0
  30. pegasource-0.1.0/tests/test_geo.py +183 -0
  31. pegasource-0.1.0/tests/test_pcap.py +228 -0
  32. pegasource-0.1.0/tests/test_timeseries.py +202 -0
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.4
2
+ Name: pegasource
3
+ Version: 0.1.0
4
+ Summary: Offline-capable toolkit: PCAP analysis, geographic functions, and time-series prediction
5
+ Author: Josef Berman
6
+ License: MIT
7
+ Keywords: pcap,network,geo,timeseries,israel,roads
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: scapy>=2.5
19
+ Requires-Dist: numpy>=1.24
20
+ Requires-Dist: scipy>=1.11
21
+ Requires-Dist: scikit-image>=0.21
22
+ Requires-Dist: networkx>=3.1
23
+ Requires-Dist: pyproj>=3.6
24
+ Requires-Dist: shapely>=2.0
25
+ Requires-Dist: statsmodels>=0.14
26
+ Requires-Dist: scikit-learn>=1.3
27
+ Requires-Dist: pandas>=2.0
28
+ Requires-Dist: matplotlib>=3.7
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.4; extra == "dev"
31
+ Requires-Dist: pytest-cov; extra == "dev"
32
+ Requires-Dist: mkdocs<2; extra == "dev"
33
+ Requires-Dist: mkdocs-material; extra == "dev"
34
+ Requires-Dist: mkdocstrings[python]; extra == "dev"
35
+
36
+ # Pegasource
37
+
38
+ > **Offline-capable Python toolkit** — PCAP analysis, geographic functions, and automatic time-series forecasting.
39
+
40
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org)
41
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
42
+
43
+ ---
44
+
45
+ ## Modules
46
+
47
+ | Module | Description |
48
+ |--------|-------------|
49
+ | `pegasource.pcap` | PCAP reader, statistics, anomaly & pattern detection |
50
+ | `pegasource.geo` | Distance, coordinate transforms, road vectorizer, Israel road network |
51
+ | `pegasource.timeseries` | Automatic time-series forecasting (SARIMAX + fallback) |
52
+
53
+ ---
54
+
55
+ ## Installation
56
+
57
+ ```bash
58
+ pip install -e ".[dev]" # development install
59
+ # or
60
+ pip install pegasource # once published
61
+ ```
62
+
63
+ > **scapy** requires root privileges to capture live traffic, but reading PCAP files works without root.
64
+
65
+ ---
66
+
67
+ ## Quick Start
68
+
69
+ ### PCAP Analysis
70
+
71
+ ```python
72
+ from pegasource.pcap import read_pcap, generate_report
73
+
74
+ packets = read_pcap("capture.pcap")
75
+ report = generate_report(packets, output_path="report.json")
76
+
77
+ # Individual detectors
78
+ from pegasource.pcap import detect_port_scan, detect_beaconing, detect_dns_anomalies
79
+
80
+ scans = detect_port_scan(packets, threshold=20)
81
+ beacons = detect_beaconing(packets, min_occurrences=5)
82
+ dns_anom = detect_dns_anomalies(packets)
83
+ ```
84
+
85
+ ### Geographic Functions
86
+
87
+ ```python
88
+ from pegasource.geo import haversine, wgs84_to_itm, load_israel_graph, shortest_path
89
+
90
+ # Distances
91
+ dist_m = haversine(31.7683, 35.2137, 32.0853, 34.7818) # Jerusalem → TLV ≈ 54 km
92
+ print(f"Distance: {dist_m / 1000:.1f} km")
93
+
94
+ # Coordinate conversion
95
+ easting, northing = wgs84_to_itm(31.7683, 35.2137) # WGS84 → ITM (EPSG:2039)
96
+
97
+ # Israel road network
98
+ G = load_israel_graph() # loads pre-processed graph
99
+ route = shortest_path(G, (31.7683, 35.2137), (32.0853, 34.7818))
100
+ print("First waypoint:", route[0])
101
+ ```
102
+
103
+ #### Downloading the road graph (one-time, ~90 MB)
104
+
105
+ ```bash
106
+ pegasource-download-roads
107
+ # or
108
+ python -m pegasource.geo.israel_roads
109
+ ```
110
+
111
+ This downloads the Geofabrik `israel-and-palestine-latest.osm.pbf` and saves a
112
+ pre-processed `israel_roads.pkl.gz` to `pegasource/data/`.
113
+
114
+ ### Road Vectorizer (from density maps)
115
+
116
+ ```python
117
+ import numpy as np
118
+ from pegasource.geo import build_graph, plot_graph_overlay
119
+
120
+ density = np.load("my_density_map.npy")
121
+ G = build_graph(density, threshold=0.3, prune_length=5)
122
+ plot_graph_overlay(density, G)
123
+ ```
124
+
125
+ API mirrors [josefberman/RoadVectorizer](https://github.com/josefberman/RoadVectorizer):
126
+
127
+ | Function | Description |
128
+ |----------|-------------|
129
+ | `build_graph(density_map, **kwargs)` | Convert 2D density histogram → `nx.Graph` |
130
+ | `compute_road_coverage(full_graph, partial_graph, tolerance=2)` | Coverage fraction |
131
+ | `plot_graph_overlay(density_map, graph, **kwargs)` | Matplotlib overlay |
132
+
133
+ ### Time-Series Prediction
134
+
135
+ ```python
136
+ import numpy as np
137
+ from pegasource.timeseries import AutoForecaster
138
+
139
+ # Univariate
140
+ y = np.sin(np.linspace(0, 8 * np.pi, 96)) + np.random.randn(96) * 0.1
141
+ fc = AutoForecaster()
142
+ fc.fit(y)
143
+ pred = fc.predict(steps=12)
144
+ print(fc.diagnostics())
145
+ fc.plot(steps=12)
146
+
147
+ # With exogenous variables
148
+ import pandas as pd
149
+ exog = pd.DataFrame({"temperature": ..., "holiday": ...}) # shape (n, k)
150
+ fc2 = AutoForecaster()
151
+ fc2.fit(y, exog=exog)
152
+ pred2 = fc2.predict(steps=6, exog=exog_future)
153
+ ```
154
+
155
+ `AutoForecaster` automatically:
156
+ 1. Detects the dominant seasonal period via ACF
157
+ 2. Tries multiple SARIMAX configurations and selects by AIC
158
+ 3. Falls back to OLS linear trend + seasonal dummies if SARIMAX fails
159
+
160
+ ---
161
+
162
+ ## Running Tests
163
+
164
+ ```bash
165
+ pip install -e ".[dev]"
166
+ pytest tests/ -v
167
+ ```
168
+
169
+ ---
170
+
171
+ ## Package Structure
172
+
173
+ ```
174
+ pegasource/
175
+ ├── pcap/
176
+ │ ├── reader.py # read_pcap, packet_summary
177
+ │ ├── stats.py # protocol_distribution, top_talkers, conversation_table
178
+ │ ├── patterns.py # port scan, beaconing, DNS anomalies, …
179
+ │ └── report.py # generate_report
180
+ ├── geo/
181
+ │ ├── distance.py # haversine, vincenty, bearing
182
+ │ ├── projection.py # wgs84_to_itm, itm_to_wgs84, wgs84_to_utm, meters_offset
183
+ │ ├── vectorizer.py # build_graph, compute_road_coverage, plot_graph_overlay
184
+ │ ├── israel_roads.py # load_israel_graph, shortest_path, subgraph_bbox
185
+ │ └── _rv_*.py # vendored RoadVectorizer source (josefberman)
186
+ ├── timeseries/
187
+ │ ├── auto.py # AutoForecaster
188
+ │ ├── models.py # SARIMAXModel, LinearTrendModel
189
+ │ └── utils.py # detect_seasonality, train_test_split_ts, rmse
190
+ └── data/
191
+ └── israel_roads.pkl.gz # pre-processed OSM graph (after download)
192
+ ```
193
+
194
+ ---
195
+
196
+ ## Dependencies
197
+
198
+ - **scapy** — PCAP parsing
199
+ - **numpy, scipy, scikit-image** — numerical + image processing
200
+ - **networkx** — road graphs
201
+ - **pyproj** — coordinate transforms
202
+ - **shapely** — geometric operations
203
+ - **statsmodels** — SARIMAX
204
+ - **scikit-learn** — feature engineering
205
+ - **pandas** — data manipulation
206
+ - **matplotlib** — visualisation
207
+
208
+ ---
209
+
210
+ ## License
211
+
212
+ MIT © Josef Berman
213
+
214
+ Road Vectorizer code adapted from [josefberman/RoadVectorizer](https://github.com/josefberman/RoadVectorizer) (MIT).
215
+ Road data © OpenStreetMap contributors (ODbL).
@@ -0,0 +1,180 @@
1
+ # Pegasource
2
+
3
+ > **Offline-capable Python toolkit** — PCAP analysis, geographic functions, and automatic time-series forecasting.
4
+
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
7
+
8
+ ---
9
+
10
+ ## Modules
11
+
12
+ | Module | Description |
13
+ |--------|-------------|
14
+ | `pegasource.pcap` | PCAP reader, statistics, anomaly & pattern detection |
15
+ | `pegasource.geo` | Distance, coordinate transforms, road vectorizer, Israel road network |
16
+ | `pegasource.timeseries` | Automatic time-series forecasting (SARIMAX + fallback) |
17
+
18
+ ---
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install -e ".[dev]" # development install
24
+ # or
25
+ pip install pegasource # once published
26
+ ```
27
+
28
+ > **scapy** requires root privileges to capture live traffic, but reading PCAP files works without root.
29
+
30
+ ---
31
+
32
+ ## Quick Start
33
+
34
+ ### PCAP Analysis
35
+
36
+ ```python
37
+ from pegasource.pcap import read_pcap, generate_report
38
+
39
+ packets = read_pcap("capture.pcap")
40
+ report = generate_report(packets, output_path="report.json")
41
+
42
+ # Individual detectors
43
+ from pegasource.pcap import detect_port_scan, detect_beaconing, detect_dns_anomalies
44
+
45
+ scans = detect_port_scan(packets, threshold=20)
46
+ beacons = detect_beaconing(packets, min_occurrences=5)
47
+ dns_anom = detect_dns_anomalies(packets)
48
+ ```
49
+
50
+ ### Geographic Functions
51
+
52
+ ```python
53
+ from pegasource.geo import haversine, wgs84_to_itm, load_israel_graph, shortest_path
54
+
55
+ # Distances
56
+ dist_m = haversine(31.7683, 35.2137, 32.0853, 34.7818) # Jerusalem → TLV ≈ 54 km
57
+ print(f"Distance: {dist_m / 1000:.1f} km")
58
+
59
+ # Coordinate conversion
60
+ easting, northing = wgs84_to_itm(31.7683, 35.2137) # WGS84 → ITM (EPSG:2039)
61
+
62
+ # Israel road network
63
+ G = load_israel_graph() # loads pre-processed graph
64
+ route = shortest_path(G, (31.7683, 35.2137), (32.0853, 34.7818))
65
+ print("First waypoint:", route[0])
66
+ ```
67
+
68
+ #### Downloading the road graph (one-time, ~90 MB)
69
+
70
+ ```bash
71
+ pegasource-download-roads
72
+ # or
73
+ python -m pegasource.geo.israel_roads
74
+ ```
75
+
76
+ This downloads the Geofabrik `israel-and-palestine-latest.osm.pbf` and saves a
77
+ pre-processed `israel_roads.pkl.gz` to `pegasource/data/`.
78
+
79
+ ### Road Vectorizer (from density maps)
80
+
81
+ ```python
82
+ import numpy as np
83
+ from pegasource.geo import build_graph, plot_graph_overlay
84
+
85
+ density = np.load("my_density_map.npy")
86
+ G = build_graph(density, threshold=0.3, prune_length=5)
87
+ plot_graph_overlay(density, G)
88
+ ```
89
+
90
+ API mirrors [josefberman/RoadVectorizer](https://github.com/josefberman/RoadVectorizer):
91
+
92
+ | Function | Description |
93
+ |----------|-------------|
94
+ | `build_graph(density_map, **kwargs)` | Convert 2D density histogram → `nx.Graph` |
95
+ | `compute_road_coverage(full_graph, partial_graph, tolerance=2)` | Coverage fraction |
96
+ | `plot_graph_overlay(density_map, graph, **kwargs)` | Matplotlib overlay |
97
+
98
+ ### Time-Series Prediction
99
+
100
+ ```python
101
+ import numpy as np
102
+ from pegasource.timeseries import AutoForecaster
103
+
104
+ # Univariate
105
+ y = np.sin(np.linspace(0, 8 * np.pi, 96)) + np.random.randn(96) * 0.1
106
+ fc = AutoForecaster()
107
+ fc.fit(y)
108
+ pred = fc.predict(steps=12)
109
+ print(fc.diagnostics())
110
+ fc.plot(steps=12)
111
+
112
+ # With exogenous variables
113
+ import pandas as pd
114
+ exog = pd.DataFrame({"temperature": ..., "holiday": ...}) # shape (n, k)
115
+ fc2 = AutoForecaster()
116
+ fc2.fit(y, exog=exog)
117
+ pred2 = fc2.predict(steps=6, exog=exog_future)
118
+ ```
119
+
120
+ `AutoForecaster` automatically:
121
+ 1. Detects the dominant seasonal period via ACF
122
+ 2. Tries multiple SARIMAX configurations and selects by AIC
123
+ 3. Falls back to OLS linear trend + seasonal dummies if SARIMAX fails
124
+
125
+ ---
126
+
127
+ ## Running Tests
128
+
129
+ ```bash
130
+ pip install -e ".[dev]"
131
+ pytest tests/ -v
132
+ ```
133
+
134
+ ---
135
+
136
+ ## Package Structure
137
+
138
+ ```
139
+ pegasource/
140
+ ├── pcap/
141
+ │ ├── reader.py # read_pcap, packet_summary
142
+ │ ├── stats.py # protocol_distribution, top_talkers, conversation_table
143
+ │ ├── patterns.py # port scan, beaconing, DNS anomalies, …
144
+ │ └── report.py # generate_report
145
+ ├── geo/
146
+ │ ├── distance.py # haversine, vincenty, bearing
147
+ │ ├── projection.py # wgs84_to_itm, itm_to_wgs84, wgs84_to_utm, meters_offset
148
+ │ ├── vectorizer.py # build_graph, compute_road_coverage, plot_graph_overlay
149
+ │ ├── israel_roads.py # load_israel_graph, shortest_path, subgraph_bbox
150
+ │ └── _rv_*.py # vendored RoadVectorizer source (josefberman)
151
+ ├── timeseries/
152
+ │ ├── auto.py # AutoForecaster
153
+ │ ├── models.py # SARIMAXModel, LinearTrendModel
154
+ │ └── utils.py # detect_seasonality, train_test_split_ts, rmse
155
+ └── data/
156
+ └── israel_roads.pkl.gz # pre-processed OSM graph (after download)
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Dependencies
162
+
163
+ - **scapy** — PCAP parsing
164
+ - **numpy, scipy, scikit-image** — numerical + image processing
165
+ - **networkx** — road graphs
166
+ - **pyproj** — coordinate transforms
167
+ - **shapely** — geometric operations
168
+ - **statsmodels** — SARIMAX
169
+ - **scikit-learn** — feature engineering
170
+ - **pandas** — data manipulation
171
+ - **matplotlib** — visualisation
172
+
173
+ ---
174
+
175
+ ## License
176
+
177
+ MIT © Josef Berman
178
+
179
+ Road Vectorizer code adapted from [josefberman/RoadVectorizer](https://github.com/josefberman/RoadVectorizer) (MIT).
180
+ Road data © OpenStreetMap contributors (ODbL).
@@ -0,0 +1,12 @@
1
+ """
2
+ pegasource — Offline-capable Python toolkit.
3
+
4
+ Modules
5
+ -------
6
+ pegasource.pcap PCAP reader, statistics, and anomaly/pattern detection
7
+ pegasource.geo Geographic utilities, coordinate transforms, road graphs
8
+ pegasource.timeseries Simple automatic time-series forecasting
9
+ """
10
+
11
+ __version__ = "0.1.0"
12
+ __all__ = ["pcap", "geo", "timeseries"]
@@ -0,0 +1,15 @@
1
+ """
2
+ Placeholder for the Israel road network graph file.
3
+
4
+ Run the following command to download and pre-process the OpenStreetMap
5
+ data for Israel and Palestine (~90 MB download):
6
+
7
+ pegasource-download-roads
8
+
9
+ or:
10
+
11
+ python -m pegasource.geo.israel_roads
12
+
13
+ After running, the file `israel_roads.pkl.gz` will appear in this directory
14
+ and will be loaded automatically by `load_israel_graph()`.
15
+ """
@@ -0,0 +1,36 @@
1
+ """
2
+ pegasource.geo — Geographic utilities: distance, projection, road vectorizer, Israel roads.
3
+
4
+ Quick start::
5
+
6
+ from pegasource.geo import haversine, wgs84_to_itm, load_israel_graph
7
+
8
+ dist_m = haversine(31.7683, 35.2137, 32.0853, 34.7818) # Jerusalem → TLV
9
+ e, n = wgs84_to_itm(31.7683, 35.2137)
10
+ G = load_israel_graph()
11
+ """
12
+
13
+ from .distance import haversine, vincenty, bearing
14
+ from .projection import wgs84_to_itm, itm_to_wgs84, wgs84_to_utm, meters_offset
15
+ from .vectorizer import build_graph, compute_road_coverage, plot_graph_overlay
16
+ from .israel_roads import load_israel_graph, shortest_path, subgraph_bbox
17
+
18
+ __all__ = [
19
+ # distance
20
+ "haversine",
21
+ "vincenty",
22
+ "bearing",
23
+ # projection
24
+ "wgs84_to_itm",
25
+ "itm_to_wgs84",
26
+ "wgs84_to_utm",
27
+ "meters_offset",
28
+ # vectorizer
29
+ "build_graph",
30
+ "compute_road_coverage",
31
+ "plot_graph_overlay",
32
+ # israel roads
33
+ "load_israel_graph",
34
+ "shortest_path",
35
+ "subgraph_bbox",
36
+ ]