pegasource 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pegasource/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ """
2
+ pegasource — Offline-capable Python toolkit.
3
+
4
+ Modules
5
+ -------
6
+ pegasource.pcap PCAP reader, statistics, and anomaly/pattern detection
7
+ pegasource.geo Geographic utilities, coordinate transforms, road graphs
8
+ pegasource.timeseries Simple automatic time-series forecasting
9
+ """
10
+
11
+ __version__ = "0.1.0"
12
+ __all__ = ["pcap", "geo", "timeseries"]
@@ -0,0 +1,15 @@
1
+ """
2
+ Placeholder for the Israel road network graph file.
3
+
4
+ Run the following command to download and pre-process the OpenStreetMap
5
+ data for Israel and Palestine (~90 MB download):
6
+
7
+ pegasource-download-roads
8
+
9
+ or:
10
+
11
+ python -m pegasource.geo.israel_roads
12
+
13
+ After running, the file `israel_roads.pkl.gz` will appear in this directory
14
+ and will be loaded automatically by `load_israel_graph()`.
15
+ """
@@ -0,0 +1,36 @@
1
+ """
2
+ pegasource.geo — Geographic utilities: distance, projection, road vectorizer, Israel roads.
3
+
4
+ Quick start::
5
+
6
+ from pegasource.geo import haversine, wgs84_to_itm, load_israel_graph
7
+
8
+ dist_m = haversine(31.7683, 35.2137, 32.0853, 34.7818) # Jerusalem → TLV
9
+ e, n = wgs84_to_itm(31.7683, 35.2137)
10
+ G = load_israel_graph()
11
+ """
12
+
13
+ from .distance import haversine, vincenty, bearing
14
+ from .projection import wgs84_to_itm, itm_to_wgs84, wgs84_to_utm, meters_offset
15
+ from .vectorizer import build_graph, compute_road_coverage, plot_graph_overlay
16
+ from .israel_roads import load_israel_graph, shortest_path, subgraph_bbox
17
+
18
+ __all__ = [
19
+ # distance
20
+ "haversine",
21
+ "vincenty",
22
+ "bearing",
23
+ # projection
24
+ "wgs84_to_itm",
25
+ "itm_to_wgs84",
26
+ "wgs84_to_utm",
27
+ "meters_offset",
28
+ # vectorizer
29
+ "build_graph",
30
+ "compute_road_coverage",
31
+ "plot_graph_overlay",
32
+ # israel roads
33
+ "load_israel_graph",
34
+ "shortest_path",
35
+ "subgraph_bbox",
36
+ ]
@@ -0,0 +1,455 @@
1
+ """
2
+ Graph construction from a skeletonised road map.
3
+
4
+ The pipeline:
5
+ 1. Label each skeleton pixel by its 8-connected neighbour count.
6
+ 2. Mark *nodes* — pixels with ≠ 2 neighbours (junctions & endpoints).
7
+ 3. Trace skeleton paths between every pair of adjacent nodes.
8
+ 4. Build and return a weighted ``networkx.Graph``.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections import deque
14
+ from typing import Any
15
+
16
+ import networkx as nx
17
+ import numpy as np
18
+ from scipy import ndimage
19
+
20
+ from ._rv_preprocessing import binarize, skeletonize_map
21
+
22
+ # 8-connectivity structuring element
23
+ _STRUCT_8 = np.ones((3, 3), dtype=int)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Helpers
28
+ # ---------------------------------------------------------------------------
29
+
30
+ def _neighbour_count(skeleton: np.ndarray) -> np.ndarray:
31
+ """Return an array where each skeleton pixel holds its 8-neighbour count."""
32
+ skel = skeleton.astype(np.uint8)
33
+ count = ndimage.convolve(skel, _STRUCT_8, mode="constant", cval=0)
34
+ # subtract the pixel itself
35
+ count = count - skel
36
+ # zero out non-skeleton pixels
37
+ count[~skeleton] = 0
38
+ return count
39
+
40
+
41
+ def find_nodes(skeleton: np.ndarray) -> list[tuple[int, int]]:
42
+ """Identify junction and endpoint pixels on *skeleton*.
43
+
44
+ A pixel is a *node* if it has **≠ 2** skeleton neighbours:
45
+ * 1 neighbour → endpoint
46
+ * ≥ 3 neighbours → junction / intersection
47
+ * 0 neighbours → isolated pixel (also treated as a node)
48
+
49
+ Returns a list of ``(row, col)`` coordinates.
50
+ """
51
+ nbr = _neighbour_count(skeleton)
52
+ # Nodes: skeleton pixels that are NOT simple chain links (≠ 2 neighbours)
53
+ node_mask = skeleton & (nbr != 2)
54
+ rows, cols = np.nonzero(node_mask)
55
+ return list(zip(rows.tolist(), cols.tolist()))
56
+
57
+
58
+ def _neighbours_of(r: int, c: int, shape: tuple[int, int]):
59
+ """Yield valid 8-connected neighbours of (r, c)."""
60
+ for dr in (-1, 0, 1):
61
+ for dc in (-1, 0, 1):
62
+ if dr == 0 and dc == 0:
63
+ continue
64
+ nr, nc = r + dr, c + dc
65
+ if 0 <= nr < shape[0] and 0 <= nc < shape[1]:
66
+ yield nr, nc
67
+
68
+
69
+ def trace_edges(
70
+ skeleton: np.ndarray,
71
+ nodes: list[tuple[int, int]],
72
+ density_map: np.ndarray,
73
+ ) -> list[dict[str, Any]]:
74
+ """Walk the skeleton from every node and trace edges to adjacent nodes.
75
+
76
+ Each edge record contains:
77
+ * ``u``, ``v`` — node coordinates ``(row, col)``
78
+ * ``weight`` — mean density along the path
79
+ * ``max_density`` — maximum density along the path
80
+ * ``length`` — number of pixels in the path
81
+ * ``path`` — ordered list of ``(row, col)`` for every pixel
82
+
83
+ Duplicate edges (u–v and v–u) are naturally deduplicated because we
84
+ mark visited pixels during traversal.
85
+ """
86
+ node_set = set(nodes)
87
+ shape = skeleton.shape
88
+ visited_edges: set[frozenset] = set()
89
+ edges: list[dict[str, Any]] = []
90
+
91
+ # For every node, try to walk each of its skeleton neighbours
92
+ for node in nodes:
93
+ r0, c0 = node
94
+ for nr, nc in _neighbours_of(r0, c0, shape):
95
+ if not skeleton[nr, nc]:
96
+ continue
97
+
98
+ # Walk along the skeleton until we hit another node
99
+ path = [node, (nr, nc)]
100
+ prev = node
101
+ cur = (nr, nc)
102
+
103
+ while cur not in node_set:
104
+ # Find the next skeleton pixel (not the one we came from)
105
+ moved = False
106
+ for nnr, nnc in _neighbours_of(cur[0], cur[1], shape):
107
+ if (nnr, nnc) != prev and skeleton[nnr, nnc]:
108
+ prev = cur
109
+ cur = (nnr, nnc)
110
+ path.append(cur)
111
+ moved = True
112
+ break
113
+ if not moved:
114
+ # Dead-end that isn't marked as a node (shouldn't happen
115
+ # normally, but be safe)
116
+ break
117
+
118
+ # `cur` should now be another node (or we hit a dead end)
119
+ if cur not in node_set:
120
+ continue
121
+
122
+ edge_key = frozenset((node, cur))
123
+ if edge_key in visited_edges:
124
+ continue
125
+ visited_edges.add(edge_key)
126
+
127
+ densities = np.array([density_map[r, c] for r, c in path])
128
+ edges.append(
129
+ {
130
+ "u": node,
131
+ "v": cur,
132
+ "weight": float(densities.mean()),
133
+ "max_density": float(densities.max()),
134
+ "length": len(path),
135
+ "path": path,
136
+ }
137
+ )
138
+
139
+ return edges
140
+
141
+
142
+ def _cluster_junction_nodes(
143
+ G: nx.Graph,
144
+ density_map: np.ndarray,
145
+ merge_distance: int,
146
+ ) -> nx.Graph:
147
+ """Collapse clusters of spatially-close nodes into single representatives.
148
+
149
+ Uses hierarchical clustering on the **Euclidean** pixel coordinates so
150
+ that a cluster can never span more than *merge_distance* pixels,
151
+ regardless of how many short edges chain together in the graph.
152
+ """
153
+ from scipy.cluster.hierarchy import fcluster, linkage
154
+
155
+ nodes = list(G.nodes())
156
+ if len(nodes) <= 1:
157
+ return G
158
+
159
+ coords = np.array(nodes, dtype=float) # (N, 2) — row, col
160
+
161
+ # Single-linkage hierarchical clustering with Euclidean distance
162
+ Z = linkage(coords, method="single", metric="euclidean")
163
+ labels = fcluster(Z, t=merge_distance, criterion="distance")
164
+
165
+ # Group nodes by cluster label
166
+ clusters: dict[int, list[tuple]] = {}
167
+ for node, label in zip(nodes, labels):
168
+ clusters.setdefault(label, []).append(node)
169
+
170
+ # Map every node to its cluster representative (closest to centroid)
171
+ node_to_rep: dict[tuple, tuple] = {}
172
+ for members in clusters.values():
173
+ cr = int(round(np.mean([m[0] for m in members])))
174
+ cc = int(round(np.mean([m[1] for m in members])))
175
+ best = min(members, key=lambda m: (m[0] - cr) ** 2 + (m[1] - cc) ** 2)
176
+ for m in members:
177
+ node_to_rep[m] = best
178
+
179
+ # Build new graph
180
+ H = nx.Graph()
181
+ reps = set(node_to_rep.values())
182
+ for rep in reps:
183
+ H.add_node(rep, pos=(rep[1], rep[0]))
184
+
185
+ # Re-add edges, skipping intra-cluster ones; keep longest path for dupes
186
+ for u, v, data in G.edges(data=True):
187
+ ru, rv = node_to_rep[u], node_to_rep[v]
188
+ if ru == rv:
189
+ continue
190
+
191
+ if H.has_edge(ru, rv):
192
+ if data["length"] > H.edges[ru, rv]["length"]:
193
+ H.edges[ru, rv].update(data)
194
+ else:
195
+ H.add_edge(ru, rv, **data)
196
+
197
+ return H
198
+
199
+
200
+ def _merge_paths(path_ab: list, path_bc: list) -> list:
201
+ """Join two pixel paths that share an endpoint (the middle node)."""
202
+ if path_ab[-1] == path_bc[0]:
203
+ return path_ab + path_bc[1:]
204
+ if path_ab[-1] == path_bc[-1]:
205
+ return path_ab + path_bc[-2::-1]
206
+ if path_ab[0] == path_bc[0]:
207
+ return path_ab[::-1] + path_bc[1:]
208
+ if path_ab[0] == path_bc[-1]:
209
+ return path_ab[::-1] + path_bc[-2::-1]
210
+ # Fallback: concatenate
211
+ return path_ab + path_bc
212
+
213
+
214
+ def _contract_degree2_chains(
215
+ G: nx.Graph,
216
+ density_map: np.ndarray,
217
+ ) -> nx.Graph:
218
+ """Remove degree-2 pass-through nodes, merging their two edges into one."""
219
+ changed = True
220
+ while changed:
221
+ changed = False
222
+ for node in list(G.nodes()):
223
+ if node not in G or G.degree(node) != 2:
224
+ continue
225
+ neighbours = list(G.neighbors(node))
226
+ if len(neighbours) != 2:
227
+ continue
228
+ a, b = neighbours
229
+
230
+ path_a = G.edges[node, a].get("path", [])
231
+ path_b = G.edges[node, b].get("path", [])
232
+ new_path = _merge_paths(path_a, path_b)
233
+ densities = np.array([density_map[r, c] for r, c in new_path])
234
+
235
+ G.remove_node(node)
236
+ G.add_edge(
237
+ a, b,
238
+ weight=float(densities.mean()),
239
+ max_density=float(densities.max()),
240
+ length=len(new_path),
241
+ path=new_path,
242
+ )
243
+ # Update pos for surviving nodes
244
+ if a not in nx.get_node_attributes(G, "pos"):
245
+ G.nodes[a]["pos"] = (a[1], a[0])
246
+ if b not in nx.get_node_attributes(G, "pos"):
247
+ G.nodes[b]["pos"] = (b[1], b[0])
248
+ changed = True
249
+
250
+ return G
251
+
252
+
253
+ def simplify_graph(
254
+ G: nx.Graph,
255
+ density_map: np.ndarray,
256
+ merge_distance: int = 5,
257
+ ) -> nx.Graph:
258
+ """Simplify the graph in two phases:
259
+
260
+ 1. **Cluster** spatially-close nodes (junctions, nearby branch points)
261
+ into single representative nodes.
262
+ 2. **Contract** remaining degree-2 chain nodes into longer edges.
263
+
264
+ Parameters
265
+ ----------
266
+ G : nx.Graph
267
+ Raw graph from edge tracing.
268
+ density_map : np.ndarray
269
+ Original density map for recomputing weights.
270
+ merge_distance : int
271
+ Maximum edge length (pixels) for clustering nearby nodes.
272
+
273
+ Returns
274
+ -------
275
+ nx.Graph
276
+ """
277
+ G = _cluster_junction_nodes(G, density_map, merge_distance)
278
+ G = _contract_degree2_chains(G, density_map)
279
+ return G
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Public API
284
+ # ---------------------------------------------------------------------------
285
+
286
+ def build_graph(
287
+ density_map: np.ndarray,
288
+ threshold: float | None = None,
289
+ dilate_radius: int = 0,
290
+ prune_length: int = 0,
291
+ merge_distance: int = 5,
292
+ ) -> nx.Graph:
293
+ """Convert a 2D density histogram into a weighted undirected graph.
294
+
295
+ Parameters
296
+ ----------
297
+ density_map : np.ndarray
298
+ 2D numpy array of non-negative density values.
299
+ threshold : float or None
300
+ Density value above which a pixel is considered part of a road.
301
+ ``None`` → automatic (Otsu).
302
+ dilate_radius : int
303
+ Optional dilation before skeletonization (helps connect fragmented
304
+ roads).
305
+ prune_length : int
306
+ After merging, drop dead-end spur edges (where at least one
307
+ endpoint has degree 1) with ``length <= prune_length``.
308
+ merge_distance : int
309
+ Iteratively merge degree-2 nodes connected by edges shorter than
310
+ this many pixels. Set to 0 to disable.
311
+
312
+ Returns
313
+ -------
314
+ nx.Graph
315
+ Undirected graph whose nodes carry ``pos = (col, row)`` (x, y for
316
+ plotting) and whose edges carry ``weight`` (mean density),
317
+ ``max_density``, ``length``, and ``path``.
318
+ """
319
+ # 1. Preprocess
320
+ mask = binarize(density_map, threshold=threshold)
321
+ skeleton = skeletonize_map(mask, dilate_radius=dilate_radius)
322
+
323
+ # 2. Detect nodes
324
+ nodes = find_nodes(skeleton)
325
+
326
+ # Handle edge case: skeleton exists but no nodes were found
327
+ # (e.g. a single closed loop with no junctions)
328
+ if not nodes and skeleton.any():
329
+ r, c = np.argwhere(skeleton)[0]
330
+ nodes = [(int(r), int(c))]
331
+
332
+ # 3. Trace edges — keep ALL edges initially
333
+ raw_edges = trace_edges(skeleton, nodes, density_map)
334
+
335
+ # 4. Build NetworkX graph (no pruning yet)
336
+ G = nx.Graph()
337
+ for node in nodes:
338
+ G.add_node(node, pos=(node[1], node[0]))
339
+
340
+ for edge in raw_edges:
341
+ G.add_edge(
342
+ edge["u"],
343
+ edge["v"],
344
+ weight=edge["weight"],
345
+ max_density=edge["max_density"],
346
+ length=edge["length"],
347
+ path=edge["path"],
348
+ )
349
+
350
+ # Remove isolated nodes (no edges at all)
351
+ isolated = [n for n in G.nodes() if G.degree(n) == 0]
352
+ G.remove_nodes_from(isolated)
353
+
354
+ # 5. Simplify: cluster nearby nodes + contract degree-2 chains
355
+ if merge_distance > 0:
356
+ G = simplify_graph(G, density_map, merge_distance=merge_distance)
357
+
358
+ # 6. Prune dead-end spurs AFTER simplification
359
+ # Only remove short edges where at least one endpoint is a dead-end
360
+ # (degree 1), so we never break through-roads
361
+ if prune_length > 0:
362
+ changed = True
363
+ while changed:
364
+ changed = False
365
+ for u, v, data in list(G.edges(data=True)):
366
+ if u not in G or v not in G:
367
+ continue
368
+ if data["length"] > prune_length:
369
+ continue
370
+ du, dv = G.degree(u), G.degree(v)
371
+ if du == 1 or dv == 1:
372
+ G.remove_edge(u, v)
373
+ changed = True
374
+ # Clean up newly isolated nodes
375
+ isolated = [n for n in G.nodes() if G.degree(n) == 0]
376
+ G.remove_nodes_from(isolated)
377
+
378
+ return G
379
+
380
+
381
+ def compute_road_coverage(
382
+ full_graph: nx.Graph,
383
+ partial_graph: nx.Graph,
384
+ tolerance: int = 2,
385
+ ) -> dict:
386
+ """Compute what fraction of a full road network is present in a partial one.
387
+
388
+ For each edge in *full_graph*, the function checks how many of its
389
+ path pixels lie within *tolerance* pixels of any path pixel in
390
+ *partial_graph*. The overall coverage is the length-weighted
391
+ fraction of matched road.
392
+
393
+ Parameters
394
+ ----------
395
+ full_graph : nx.Graph
396
+ The reference graph (built from the full density map).
397
+ Edges must carry a ``path`` attribute (list of ``(row, col)``).
398
+ partial_graph : nx.Graph
399
+ A graph built from a partial density map (some roads removed).
400
+ Edges must carry a ``path`` attribute.
401
+ tolerance : int
402
+ A full-graph pixel is considered "covered" if any partial-graph
403
+ pixel is within this many pixels (Chebyshev / L∞ distance).
404
+ Use 0 for exact pixel match, 1–3 for fuzzy matching that
405
+ tolerates slight skeleton shifts between the two maps.
406
+
407
+ Returns
408
+ -------
409
+ dict
410
+ ``"coverage"`` — float in [0, 1], overall fraction of road
411
+ length present in the partial graph.
412
+
413
+ ``"edges"`` — list of dicts, one per edge, each containing:
414
+ ``"u"``, ``"v"``, ``"length"``, ``"covered_pixels"``,
415
+ ``"edge_coverage"`` (fraction for that edge).
416
+ """
417
+ # Build a set of all path pixels from the partial graph
418
+ # When tolerance > 0, dilate the set by adding neighbouring pixels
419
+ partial_pixels: set[tuple[int, int]] = set()
420
+ for _, _, data in partial_graph.edges(data=True):
421
+ for r, c in data.get("path", []):
422
+ for dr in range(-tolerance, tolerance + 1):
423
+ for dc in range(-tolerance, tolerance + 1):
424
+ partial_pixels.add((r + dr, c + dc))
425
+
426
+ total_pixels = 0
427
+ covered_pixels = 0
428
+ edge_details = []
429
+
430
+ for u, v, data in full_graph.edges(data=True):
431
+ path = data.get("path", [])
432
+ n_total = len(path)
433
+ if n_total == 0:
434
+ continue
435
+
436
+ n_covered = sum(1 for p in path if p in partial_pixels)
437
+
438
+ total_pixels += n_total
439
+ covered_pixels += n_covered
440
+
441
+ edge_details.append({
442
+ "u": u,
443
+ "v": v,
444
+ "length": n_total,
445
+ "covered_pixels": n_covered,
446
+ "edge_coverage": n_covered / n_total if n_total > 0 else 0.0,
447
+ })
448
+
449
+ overall = covered_pixels / total_pixels if total_pixels > 0 else 0.0
450
+
451
+ return {
452
+ "coverage": overall,
453
+ "edges": edge_details,
454
+ }
455
+
@@ -0,0 +1,62 @@
1
+ """
2
+ Preprocessing utilities: thresholding and skeletonization of 2D density histograms.
3
+ """
4
+
5
+ import numpy as np
6
+ from skimage.filters import threshold_otsu
7
+ from skimage.morphology import skeletonize, binary_dilation, disk
8
+
9
+
10
+ def binarize(histogram: np.ndarray, threshold: float | None = None) -> np.ndarray:
11
+ """Convert a 2D density histogram to a binary road mask.
12
+
13
+ Parameters
14
+ ----------
15
+ histogram : np.ndarray
16
+ 2D array of density values (non-negative).
17
+ threshold : float or None
18
+ Pixel values **above** this threshold are considered road.
19
+ If ``None``, Otsu's method is used to determine the threshold
20
+ automatically.
21
+
22
+ Returns
23
+ -------
24
+ np.ndarray
25
+ Boolean 2D array — ``True`` where roads are detected.
26
+ """
27
+ if histogram.ndim != 2:
28
+ raise ValueError(f"Expected a 2D array, got {histogram.ndim}D.")
29
+
30
+ hist = histogram.astype(np.float64)
31
+
32
+ if threshold is None:
33
+ # Otsu needs at least two distinct values
34
+ if hist.max() == hist.min():
35
+ return np.zeros_like(hist, dtype=bool)
36
+ threshold = threshold_otsu(hist)
37
+
38
+ return hist > threshold
39
+
40
+
41
+ def skeletonize_map(binary_mask: np.ndarray, dilate_radius: int = 0) -> np.ndarray:
42
+ """Thin a binary road mask to 1-pixel-wide centre-lines.
43
+
44
+ Parameters
45
+ ----------
46
+ binary_mask : np.ndarray
47
+ Boolean 2D array (e.g. output of :func:`binarize`).
48
+ dilate_radius : int
49
+ If > 0, dilate the mask before skeletonizing. Useful when the
50
+ road regions are very thin and fragmented.
51
+
52
+ Returns
53
+ -------
54
+ np.ndarray
55
+ Boolean 2D skeleton array.
56
+ """
57
+ mask = binary_mask.astype(bool)
58
+
59
+ if dilate_radius > 0:
60
+ mask = binary_dilation(mask, disk(dilate_radius))
61
+
62
+ return skeletonize(mask)