dongnae 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dongnae/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .engine import DongnaeEngine
2
+ __version__ = '0.1.0'
3
+ Dongnae = dict
4
+ __all__ = ['DongnaeEngine', 'Dongnae', '__version__']
dongnae/engine.py ADDED
@@ -0,0 +1,246 @@
1
+ """
2
+ Key concept of this engine is "dongnae" - an object that has 'ID(dnid), Name(dnname), 2D coordinates(dnlatitude, dnlongitude), and radius(dnradius).
3
+ Dictionary of dongnaes should be loaded from CSV prior to using this engine.
4
+ """
5
+
6
+ import csv
7
+ import math
8
+ from typing import List, Dict, Optional, Union, TypedDict
9
+
10
+ # [Improvement] 데이터 구조 명시를 위한 TypedDict 정의
11
+ # total=False를 사용하여 distance, score 등 동적 필드 허용
12
+ class DongnaeData(TypedDict, total=False):
13
+ dnid: str
14
+ dnname: str
15
+ dnlatitude: float
16
+ dnlongitude: float
17
+ dnradius: float
18
+ distance: float # Injected during runtime
19
+ score: float # Injected during runtime
20
+
21
+ class DongnaeEngine:
22
+ def __init__(self, csv_path: str = None):
23
+ self._dongnaes: List[DongnaeData] = []
24
+ self._id_map: Dict[str, DongnaeData] = {} # [Improvement] O(1) ID 조회를 위한 인덱스
25
+
26
+ # Default Haversine coefficients (Based on Korea, approx 37N)
27
+ # Will be updated automatically in load()
28
+ self._lat_coef = 111.0
29
+ self._lon_coef = 88.8
30
+
31
+ # load CSV if path provided
32
+ if csv_path:
33
+ self.load(csv_path)
34
+
35
+ def load(self, csv_path: str):
36
+ """
37
+ Loads the optimized CSV file into memory with automatic encoding detection.
38
+ Also auto-calculates Haversine coefficients based on the dataset's latitude.
39
+ """
40
+ encodings = ['utf-8-sig', 'cp949', 'utf-8']
41
+ loaded = False
42
+
43
+ for enc in encodings:
44
+ try:
45
+ with open(csv_path, mode='r', encoding=enc) as f:
46
+ reader = csv.DictReader(f)
47
+ temp_data: List[DongnaeData] = []
48
+ for row in reader:
49
+ temp_data.append({
50
+ 'dnid': row['dnid'],
51
+ 'dnname': row['dnname'],
52
+ 'dnlatitude': float(row['dnlatitude']),
53
+ 'dnlongitude': float(row['dnlongitude']),
54
+ 'dnradius': float(row['dnradius'])
55
+ })
56
+ self._dongnaes = temp_data
57
+ loaded = True
58
+ break
59
+
60
+ except (UnicodeDecodeError, KeyError, ValueError):
61
+ continue
62
+
63
+ if not loaded:
64
+ raise ValueError(f"Failed to load CSV: {csv_path}. Tried encodings: {encodings}.")
65
+
66
+ # -------------------------------------------------------
67
+ # [Improvement] Build ID Index (HashMap) for O(1) lookup
68
+ # -------------------------------------------------------
69
+ # 리스트의 객체를 그대로 참조(Reference)하므로 메모리 효율적임
70
+ if self._dongnaes:
71
+ self._id_map = {d['dnid']: d for d in self._dongnaes}
72
+
73
+ # -------------------------------------------------------
74
+ # [Auto-Calibration] Calculate Haversine coefficients
75
+ # -------------------------------------------------------
76
+ if self._dongnaes:
77
+ # 1. Extract all latitudes
78
+ lats = [d['dnlatitude'] for d in self._dongnaes]
79
+
80
+ # 2. Find the center latitude of the dataset
81
+ min_lat, max_lat = min(lats), max(lats)
82
+ avg_lat = (min_lat + max_lat) / 2.0
83
+
84
+ # 3. Update coefficients (Round to 2 decimal places)
85
+ # Latitude: Approx 111 km per degree (Constant)
86
+ # Longitude: 111 * cos(lat) km per degree (Varies by latitude)
87
+ self._lat_coef = 111.0
88
+ self._lon_coef = round(111.0 * math.cos(math.radians(avg_lat)), 2)
89
+
90
+ def _calc_dist(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
91
+ """
92
+ [Geometric Distance] Calculates distance between two points (Haversine approximation).
93
+ Uses dynamically calculated coefficients based on the loaded dataset's center latitude.
94
+ """
95
+ d_lat = (lat2 - lat1) * self._lat_coef
96
+ d_lon = (lon2 - lon1) * self._lon_coef
97
+ return math.sqrt(d_lat**2 + d_lon**2)
98
+
99
+ def _dongnae_dist(self, lat: float, lon: float, dn: DongnaeData) -> float:
100
+ """
101
+ [Business Metric] Calculates 'Boundary Distance' from a point to a Dongnae.
102
+ Returns: (Geometric Distance to Center) - (Radius of Dongnae)
103
+ """
104
+ # Delegate geometric calculation to _calc_dist
105
+ center_dist = self._calc_dist(lat, lon, dn['dnlatitude'], dn['dnlongitude'])
106
+
107
+ # Apply Radius adjustment
108
+ return center_dist - dn['dnradius']
109
+
110
+ def where(self, lat: float, lon: float) -> Optional[DongnaeData]:
111
+ """
112
+ [Reverse Geocoding] Returns the single nearest 'Dongnae' (neighborhood).
113
+ """
114
+ nearest = self.nearest(lat, lon, k=1)
115
+ return nearest[0] if nearest else None
116
+
117
+ def nearest(self, lat: float, lon: float, k: int = 1, radius_km: float = None) -> List[DongnaeData]:
118
+ """
119
+ Returns the K nearest Dongnaes sorted by 'Boundary Distance'.
120
+ :param radius_km: Used to limit the search range (performance optimization)
121
+ """
122
+ # 1. Primary filtering (Bounding Box)
123
+ # Search range = (Requested Radius OR Default 10km) + Max Dongnae Radius Buffer(5km)
124
+ scan_radius = (radius_km if radius_km else 10.0) + 5.0
125
+
126
+ # Dynamic bbox calculation using current coefficients
127
+ lat_delta = scan_radius / self._lat_coef
128
+ lon_delta = scan_radius / self._lon_coef
129
+
130
+ candidates = [
131
+ dn for dn in self._dongnaes
132
+ if (lat - lat_delta <= dn['dnlatitude'] <= lat + lat_delta) and
133
+ (lon - lon_delta <= dn['dnlongitude'] <= lon + lon_delta)
134
+ ]
135
+
136
+ if not candidates and radius_km is None:
137
+ candidates = self._dongnaes
138
+
139
+ # 2. Calculate Boundary Distance
140
+ results = []
141
+ for dn in candidates:
142
+ b_dist = self._dongnae_dist(lat, lon, dn)
143
+
144
+ if radius_km is None or b_dist <= radius_km:
145
+ # Return a copy to avoid modifying the original data in memory
146
+ dn_res = dn.copy()
147
+ dn_res['distance'] = round(b_dist, 4)
148
+ results.append(dn_res)
149
+
150
+ results.sort(key=lambda x: x['distance'])
151
+ return results[:k]
152
+
153
+ def within(self, lat: float, lon: float, radius_km: float, limit: int = None) -> List[DongnaeData]:
154
+ """
155
+ [Radius Search] Returns all Dongnaes whose boundaries are within R km.
156
+ """
157
+ return self.nearest(lat, lon, k=limit if limit else len(self._dongnaes), radius_km=radius_km)
158
+
159
+ def resolve(self, lat: float, lon: float, threshold: float = 1.0) -> List[DongnaeData]:
160
+ """
161
+ [Soft Geofencing] Determine if coordinates fall within a specific Dongnae's effective radius.
162
+ """
163
+ max_scan = 15.0 * threshold
164
+ lat_delta = max_scan / self._lat_coef
165
+ lon_delta = max_scan / self._lon_coef
166
+
167
+ candidates = [
168
+ dn for dn in self._dongnaes
169
+ if (lat - lat_delta <= dn['dnlatitude'] <= lat + lat_delta) and
170
+ (lon - lon_delta <= dn['dnlongitude'] <= lon + lon_delta)
171
+ ]
172
+
173
+ matches = []
174
+ for dn in candidates:
175
+ b_dist = self._dongnae_dist(lat, lon, dn)
176
+
177
+ if b_dist <= dn['dnradius'] * (threshold - 1.0):
178
+ dn_res = dn.copy()
179
+ dn_res['distance'] = round(b_dist, 4)
180
+
181
+ raw_dist = b_dist + dn['dnradius']
182
+ limit_dist = dn['dnradius'] * threshold
183
+
184
+ dn_res['score'] = round(raw_dist / limit_dist, 2)
185
+ matches.append(dn_res)
186
+
187
+ matches.sort(key=lambda x: x['score'])
188
+ return matches
189
+
190
+ def search(self, keyword: str, limit: int = 5, best_shot: bool = True) -> Union[List[DongnaeData], Optional[DongnaeData]]:
191
+ """
192
+ [Text Search & Geocoding] Search by Dongnae name (Bag of Words similarity).
193
+
194
+ :param keyword: Search query (e.g., "Pangyo")
195
+ :param limit: Max number of candidates (only used when best_shot=False)
196
+ :param best_shot: If True, returns the single best match (Geocoding mode).
197
+ If False, returns a list of candidates (Search mode).
198
+ """
199
+ query_tokens = keyword.strip().split()
200
+ if not query_tokens:
201
+ return None if best_shot else []
202
+
203
+ scored_list = []
204
+ for dn in self._dongnaes:
205
+ score = 0
206
+ name = dn['dnname']
207
+ for token in query_tokens:
208
+ if token in name:
209
+ score += 1
210
+
211
+ if score > 0:
212
+ scored_list.append({
213
+ 'data': dn,
214
+ 'score': score,
215
+ 'len': len(name) # Prefer shorter names (tie-breaker)
216
+ })
217
+
218
+ # Sort: High score -> Short name length
219
+ scored_list.sort(key=lambda x: (-x['score'], x['len']))
220
+
221
+ # Inject score into results
222
+ results = []
223
+ # If best_shot is True, we only need the top 1, otherwise up to limit
224
+ target_slice = scored_list[:1] if best_shot else scored_list[:limit]
225
+
226
+ for item in target_slice:
227
+ dn_res = item['data'].copy()
228
+ dn_res['score'] = item['score']
229
+ results.append(dn_res)
230
+
231
+ if not results:
232
+ return None if best_shot else []
233
+
234
+ # Return Logic based on best_shot flag
235
+ if best_shot:
236
+ return results[0] # Return single Dict (Geocoding Mode)
237
+ else:
238
+ return results # Return List[Dict] (Search Mode)
239
+
240
+ def get(self, dnid: str) -> Optional[DongnaeData]:
241
+ """
242
+ [ID Lookup] Direct lookup by dnid (legal dong code) using Hash Map.
243
+ Time Complexity: O(1)
244
+ """
245
+ return self._id_map.get(str(dnid))
246
+
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: dongnae
3
+ Version: 0.1.0
4
+ Summary: Ultra lightweight, self contained, Quasi-Geocoding Engine
5
+ Author: nash-dir
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Nash Do It Right
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/nash-dir/dongnae
29
+ Project-URL: Bug Tracker, https://github.com/nash-dir/dongnae/issues
30
+ Keywords: korea,dongnae,geocoding,search,spatial
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Topic :: Scientific/Engineering :: GIS
37
+ Requires-Python: >=3.7
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE.txt
40
+ Dynamic: license-file
41
+
42
+ # **dongnae**
43
+
44
+ ## **Ultra lightweight, self contained, Quasi-Geocoding Engine**
45
+
46
+ * **dongnae** is a dependency-free, pure Python library designed for **high-performance reverse geocoding, radius search, and spatial lookups**. It operates from self-contained native script & pre-rendered CSV dataframe. Designed for high-performance microservices and client-side applications.
47
+
48
+ * **Zero backend**
49
+ * **Zero auth**
50
+ * **Zero dependencies**
51
+
52
+ * It is optimized for **local/regional datasets** (e.g., Neighborhoods in a specific country) using latitude-based auto-calibration instead of expensive spherical trigonometry for every calculation.
53
+
54
+ ## **Key Features**
55
+
56
+ * **Quasi-Geocoding to "Dongnae""**
57
+
58
+ * Sometimes you just want to lookup which neighborhood you are in.
59
+
60
+ * Instead of precise street-level addresses, it maps coordinates to the nearest "Dongnae" (Neighborhood/District node), which is not quite precise but still good enough for some applications.
61
+
62
+ * Key concept of this engine is **"dongnae"** - an object that has ID(dnid), Name(dnname), 2D coordinates(dnlatitude, dnlongitude), and radius(dnradius).
63
+
64
+ * Dictionary of dongnaes should be loaded from CSV / JS prior to using this engine.
65
+
66
+ * **Zero Dependencies**
67
+
68
+ * **Pure Python**: Runs on pure Python & essential libraries (csv, math). No pip install required for dependencies.
69
+
70
+ * **Ultra lightweight** : Does not require heavy GIS libraries (pandas, geopandas, or shapely)
71
+
72
+ * **Lightning Fast**
73
+
74
+ * **Auto-Calibration**: Calculates Haversine coefficients once upon loading, avoiding repeated trigonometric operations (cos, sin) during queries.
75
+
76
+ * **Spatial Indexing**: Uses dynamic Bounding Box (BBox) filtering to minimize search space.
77
+
78
+ * $O(1)$ **ID Lookup**: Instant retrieval by ID using an internal Hash Map.
79
+
80
+ * **Self-contained**
81
+
82
+ * **Zero backend** : No networking, GIS server required
83
+
84
+ * **Zero dependencies** : Runs on Python standard libraries (csv, math),
85
+
86
+ * **Zero authentication** : No authentication, API key required
87
+
88
+ * **Zero vulnerability** : No external connections means no attack surface. (You can't hack what doesn't quack.)
89
+
90
+ * **Business-Ready Logic**:
91
+ * **Boundary Distance**: Calculates distance from the *edge* of a neighborhood, not just the center.
92
+
93
+ * **Soft Geofencing**: Determines if a point is "roughly" inside a neighborhood with an adjustable threshold.
94
+
95
+ * **Text Search**: Built-in keyword search functionality.
96
+
97
+ * **Privacy by Design** : No Personal Information including Geolocation sent outside.
98
+
99
+
100
+ ## **Getting Started**
101
+
102
+ ### **1\. Prerequisite: Data Format**
103
+
104
+ You need a CSV file containing your local spatial nodes. The file **must** have the following headers:
105
+
106
+ | Column | Type | Description |
107
+ | :---- | :---- | :---- |
108
+ | dnid | String | Unique Identifier (e.g., Zipcode, Legal Code) |
109
+ | dnname | String | Name of the area (e.g., "Gangnam-gu") |
110
+ | dnlatitude | Float | Y Coordinate |
111
+ | dnlongitude | Float | X Coordinate |
112
+ | dnradius | Float | Effective radius of the area (km) |
113
+
114
+ ### **2\. Installation**
115
+
116
+ ``` bash
117
+ pip install dongnae
118
+ ```
119
+
120
+ \# Initialize and load data
121
+ engine \= DongnaeEngine("data.csv")
122
+
123
+ ## **Usage Examples**
124
+
125
+ ### **1\. Reverse Geocoding (where)**
126
+
127
+ Find the nearest neighborhood for a given coordinate.
128
+
129
+ lat, lon \= 37.5665, 126.9780
130
+ result \= engine.where(lat, lon)
131
+
132
+ if result:
133
+ print(f"You are in: {result\['dnname'\]}")
134
+
135
+ ### **2\. K-Nearest Neighbors (nearest)**
136
+
137
+ Find the 3 nearest neighborhoods.
138
+
139
+ \# Get 3 closest nodes within 10km
140
+ neighbors \= engine.nearest(lat, lon, k=3, radius\_km=10.0)
141
+
142
+ for n in neighbors:
143
+ print(f"{n\['dnname'\]} \- {n\['distance'\]}km away")
144
+
145
+ ### **3\. Radius Search (within)**
146
+
147
+ Find all neighborhoods within a 2km radius.
148
+
149
+ nearby\_spots \= engine.within(lat, lon, radius\_km=2.0)
150
+
151
+ ### **4\. Soft Geofencing (resolve)**
152
+
153
+ Determines if a coordinate falls within a neighborhood's effective radius, with an optional tolerance buffer (fuzziness).
154
+
155
+ * threshold=1.0: Strict boundary.
156
+ * threshold=1.2: 20% buffer zone (Loose).
157
+
158
+ \# Check if point is effectively inside the area
159
+ matches \= engine.resolve(lat, lon, threshold=1.2)
160
+
161
+ ### **5\. Text Search (search)**
162
+
163
+ Search by name. Supports "Best Shot" (Geocoding mode) or List return.
164
+
165
+ \# Geocoding Mode (Returns single best match Dict)
166
+ best\_match \= engine.search("Pangyo", best\_shot=True)
167
+
168
+ \# Search Mode (Returns List\[Dict\])
169
+ candidates \= engine.search("Gangnam", best\_shot=False)
170
+
171
+ ### **6\. ID Lookup (get)**
172
+
173
+ Instant lookup by ID ($O(1)$).
174
+
175
+ data \= engine.get("1168010100")
176
+
177
+ ## **API Reference**
178
+
179
+ ### **DongnaeEngine**
180
+
181
+ #### **\_\_init\_\_(csv\_path: str \= None)**
182
+
183
+ Initializes the engine. If csv\_path is provided, it calls load().
184
+
185
+ #### **load(csv\_path: str)**
186
+
187
+ Loads CSV data, detects encoding (utf-8/cp949), builds the ID index, and auto-calculates distance coefficients based on the dataset's average latitude.
188
+
189
+ #### **where(lat: float, lon: float) \-\> Optional\[DongnaeData\]**
190
+
191
+ Returns the single nearest node. Returns None if no data is loaded.
192
+
193
+ #### **nearest(lat: float, lon: float, k: int \= 1, radius\_km: float \= None) \-\> List\[DongnaeData\]**
194
+
195
+ Returns a list of k nearest nodes sorted by distance.
196
+
197
+ * radius\_km: Optimization parameter. Only searches within this radius (+ buffer).
198
+
199
+ #### **within(lat: float, lon: float, radius\_km: float, limit: int \= None) \-\> List\[DongnaeData\]**
200
+
201
+ Returns all nodes strictly within radius\_km.
202
+
203
+ #### **resolve(lat: float, lon: float, threshold: float \= 1.0) \-\> List\[DongnaeData\]**
204
+
205
+ Determines spatial inclusion.
206
+
207
+ * Returns nodes where distance \<= radius \* (threshold \- 1.0).
208
+ * Useful for checking "Is the user inside this district?".
209
+
210
+ #### **search(keyword: str, limit: int \= 5, best\_shot: bool \= True) \-\> Union\[List\[DongnaeData\], Optional\[DongnaeData\]\]**
211
+
212
+ Performs a text-based search.
213
+
214
+ * **best\_shot=True**: Returns a single DongnaeData object (or None).
215
+ * **best\_shot=False**: Returns a list of candidates sorted by relevance score.
216
+
217
+ #### **get(dnid: str) \-\> Optional\[DongnaeData\]**
218
+
219
+ Retrieves a node by its dnid using a Hash Map ($O(1)$ complexity).
@@ -0,0 +1,7 @@
1
+ dongnae/__init__.py,sha256=PV935BzunWRPNPt4t9wLDaDjQz6HNdDFzP9JO-RePi0,129
2
+ dongnae/engine.py,sha256=8HCh0Dv2CO49P46XKmmezlMBvnUQopmgwUhvFHCYT6g,10215
3
+ dongnae-0.1.0.dist-info/licenses/LICENSE.txt,sha256=KF_u56JS05nXZGmbFQMNyVuWhLC3AptVjHdqUpBcdrg,1073
4
+ dongnae-0.1.0.dist-info/METADATA,sha256=fuXb7h-hlpNKDMrdACkKuEDGULnaMlK7TCr8lxQyr0I,8303
5
+ dongnae-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ dongnae-0.1.0.dist-info/top_level.txt,sha256=xrlgm4jFqEfzkhRkkPd8sBv85Ny615KYRZCkoOnHIQE,8
7
+ dongnae-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nash Do It Right
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ dongnae