csv-detective 0.9.3.dev2140__py3-none-any.whl → 0.9.3.dev2215__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_labels/FR/other/siren/__init__.py +1 -0
- csv_detective/detect_labels/FR/other/siret/__init__.py +1 -0
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +22 -29
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +15 -7
- csv_detective/detection/formats.py +3 -0
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/METADATA +17 -24
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/RECORD +11 -11
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/top_level.txt +0 -0
|
@@ -3,44 +3,37 @@ from csv_detective.parsing.text import header_score
|
|
|
3
3
|
PROPORTION = 0.5
|
|
4
4
|
|
|
5
5
|
COMMON_COORDS_LABELS = [
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
"coord gps",
|
|
6
|
+
"ban",
|
|
7
|
+
"coordinates",
|
|
9
8
|
"coordonnees",
|
|
10
|
-
"coordonnees ban",
|
|
11
|
-
"coordonnees finales",
|
|
12
|
-
"coordonnees geo",
|
|
13
|
-
"coordonnees geographiques",
|
|
14
|
-
"coordonnees geoloc",
|
|
15
|
-
"coordonnees geoloc",
|
|
16
|
-
"coordonnees gps",
|
|
17
9
|
"coordonnees insee",
|
|
18
|
-
"coordonnees xy",
|
|
19
10
|
"geo",
|
|
20
|
-
"geo coordinates",
|
|
21
|
-
"geo cp",
|
|
22
|
-
"geo localisation",
|
|
23
|
-
"geo point",
|
|
24
|
-
"geo point 2d",
|
|
25
|
-
"geolocalisation",
|
|
26
|
-
"geom x y",
|
|
27
|
-
"geometry x y",
|
|
28
11
|
"geopoint",
|
|
29
|
-
"
|
|
30
|
-
"
|
|
12
|
+
"geoloc",
|
|
13
|
+
"geolocalisation",
|
|
14
|
+
"geom",
|
|
15
|
+
"geometry",
|
|
16
|
+
"gps",
|
|
17
|
+
"localisation",
|
|
18
|
+
"point",
|
|
31
19
|
"position",
|
|
32
|
-
"position geographique",
|
|
33
20
|
"wgs84",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
specific = [
|
|
24
|
+
"latlon",
|
|
25
|
+
"lat lon",
|
|
34
26
|
"x y",
|
|
35
27
|
"xy",
|
|
36
28
|
]
|
|
37
29
|
|
|
30
|
+
# we aim wide to catch exact matches if possible for the highest possible score
|
|
31
|
+
words = (
|
|
32
|
+
COMMON_COORDS_LABELS
|
|
33
|
+
+ specific
|
|
34
|
+
+ [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
|
|
35
|
+
)
|
|
36
|
+
|
|
38
37
|
|
|
39
38
|
def _is(header: str) -> float:
|
|
40
|
-
|
|
41
|
-
"latlon wgs",
|
|
42
|
-
"latlon",
|
|
43
|
-
"latlong",
|
|
44
|
-
"lat lon",
|
|
45
|
-
] + COMMON_COORDS_LABELS
|
|
46
|
-
return header_score(header, words_combinations_list)
|
|
39
|
+
return header_score(header, words)
|
|
@@ -4,12 +4,20 @@ from ..latlon_wgs import COMMON_COORDS_LABELS
|
|
|
4
4
|
|
|
5
5
|
PROPORTION = 0.5
|
|
6
6
|
|
|
7
|
+
specific = [
|
|
8
|
+
"lonlat",
|
|
9
|
+
"lon lat",
|
|
10
|
+
"y x",
|
|
11
|
+
"yx",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
# we aim wide to catch exact matches if possible for the highest possible score
|
|
15
|
+
words = (
|
|
16
|
+
COMMON_COORDS_LABELS
|
|
17
|
+
+ specific
|
|
18
|
+
+ [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
|
|
19
|
+
)
|
|
20
|
+
|
|
7
21
|
|
|
8
22
|
def _is(header: str) -> float:
|
|
9
|
-
|
|
10
|
-
"lonlat wgs",
|
|
11
|
-
"lonlat",
|
|
12
|
-
"longlat",
|
|
13
|
-
"lon lat",
|
|
14
|
-
] + COMMON_COORDS_LABELS
|
|
15
|
-
return header_score(header, words_combinations_list)
|
|
23
|
+
return header_score(header, words)
|
|
@@ -93,12 +93,15 @@ def detect_formats(
|
|
|
93
93
|
"code_departement",
|
|
94
94
|
"code_commune_insee",
|
|
95
95
|
"code_postal",
|
|
96
|
+
"code_fantoir",
|
|
96
97
|
"latitude_wgs",
|
|
97
98
|
"longitude_wgs",
|
|
98
99
|
"latitude_wgs_fr_metropole",
|
|
99
100
|
"longitude_wgs_fr_metropole",
|
|
100
101
|
"latitude_l93",
|
|
101
102
|
"longitude_l93",
|
|
103
|
+
"siren",
|
|
104
|
+
"siret",
|
|
102
105
|
]
|
|
103
106
|
if f in scores_table.index
|
|
104
107
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.9.3.
|
|
3
|
+
Version: 0.9.3.dev2215
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
@@ -25,7 +25,6 @@ Requires-Dist: rstr==3.2.2
|
|
|
25
25
|
Provides-Extra: dev
|
|
26
26
|
Requires-Dist: pytest>=8.3.0; extra == "dev"
|
|
27
27
|
Requires-Dist: responses>=0.25.0; extra == "dev"
|
|
28
|
-
Requires-Dist: bumpx>=0.3.10; extra == "dev"
|
|
29
28
|
Requires-Dist: ruff>=0.9.3; extra == "dev"
|
|
30
29
|
Dynamic: license-file
|
|
31
30
|
|
|
@@ -220,32 +219,26 @@ ruff check --fix .
|
|
|
220
219
|
ruff format .
|
|
221
220
|
```
|
|
222
221
|
|
|
223
|
-
|
|
222
|
+
### 🏷️ Release
|
|
224
223
|
|
|
225
|
-
The release process uses `
|
|
224
|
+
The release process uses the [`tag_version.sh`](tag_version.sh) script to create git tags and update [CHANGELOG.md](CHANGELOG.md) and [pyproject.toml](pyproject.toml) automatically.
|
|
226
225
|
|
|
227
|
-
```
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
### Process
|
|
232
|
-
|
|
233
|
-
1. `bumpx` will handle bumping the version according to your command (patch, minor, major)
|
|
234
|
-
2. It will update the CHANGELOG according to the new version being published
|
|
235
|
-
3. It will push a tag with the given version to github
|
|
236
|
-
4. CircleCI will pickup this tag, build the package and publish it to pypi
|
|
237
|
-
5. `bumpx` will have everything ready for the next version (version, changelog...)
|
|
226
|
+
```bash
|
|
227
|
+
# Create a new release
|
|
228
|
+
./tag_version.sh <version>
|
|
238
229
|
|
|
239
|
-
|
|
230
|
+
# Example
|
|
231
|
+
./tag_version.sh 2.5.0
|
|
240
232
|
|
|
241
|
-
|
|
242
|
-
|
|
233
|
+
# Dry run to see what would happen
|
|
234
|
+
./tag_version.sh 2.5.0 --dry-run
|
|
243
235
|
```
|
|
244
236
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
This will release a patch version:
|
|
237
|
+
**Prerequisites**: GitHub CLI (`gh`) must be installed and authenticated, and you must be on the main branch with a clean working directory.
|
|
248
238
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
239
|
+
The script automatically:
|
|
240
|
+
- Updates the version in pyproject.toml
|
|
241
|
+
- Extracts commits since the last tag and formats them for CHANGELOG.md
|
|
242
|
+
- Identifies breaking changes (commits with `!:` in the subject)
|
|
243
|
+
- Creates a git tag and pushes it to the remote repository
|
|
244
|
+
- Creates a GitHub release with the changelog content
|
|
@@ -97,8 +97,8 @@ csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=AI9nqj3zm6_vyc
|
|
|
97
97
|
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=4Crk045ZD_tVovI7C-IqjKFz23Ej5-hrFkhZK4OilqA,258
|
|
98
98
|
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=N7LzmtNwZERgrwMy3EFHaVBpdiwkt2_9Tt7XVJLff6U,406
|
|
99
99
|
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=ZWhc8S9L1X2fFh2g5Ja-LuhsfHg_lALKrur6yDnGDPk,238
|
|
100
|
-
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=
|
|
101
|
-
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256
|
|
100
|
+
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=cGzc9HPzbWlffkzJgwujUqupLi1Pkm0HWBLZv-_c4to,402
|
|
101
|
+
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Av8IsLre6pRnPj-AHtqaU-1C_TMCxgDYAbTGIW0XIdU,339
|
|
102
102
|
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=4jIZ9cmN73XhP4ayGcEMcB_y0X45oRk1Lq2p_pNfgok,426
|
|
103
103
|
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=5L6JowK9y6y9uZNg6hWzknMSzh0SurkwQeTINNKTdYY,599
|
|
104
104
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -110,9 +110,9 @@ csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=biUZP
|
|
|
110
110
|
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=biUZP8gAsVpjXLTx1WeS19qR4ia0pzpi6R69wJgu4B0,348
|
|
111
111
|
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6fTQCXH4MYao2xmRu-o7xWcab7Jg,355
|
|
112
112
|
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
|
|
113
|
-
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=
|
|
113
|
+
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=CegBNN-RR1k-I0OU7ZsdlpVI5UBYDcj5QDX9KaWay-w,701
|
|
114
114
|
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
|
|
115
|
-
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=
|
|
115
|
+
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=ZmBLiCyboJzpsbXa5fsTxvAbO0W-ukRXnRWemN-Z-wc,481
|
|
116
116
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
117
|
csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
|
|
118
118
|
csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
|
|
@@ -131,7 +131,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
131
131
|
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
132
132
|
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
133
133
|
csv_detective/detection/engine.py,sha256=NpWUgqsNXogBnVclPYccqJZVtDd780houVY-YIMr5c0,1511
|
|
134
|
-
csv_detective/detection/formats.py,sha256=
|
|
134
|
+
csv_detective/detection/formats.py,sha256=VxLHyQNUb7SrBkS1uV6cTK7cSrCVgrpAd3nd_74s2B0,7775
|
|
135
135
|
csv_detective/detection/headers.py,sha256=hvYU13Nq8GWci5skc5vVUOxM0DwOUwbjVMlmY94lWhA,1135
|
|
136
136
|
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
137
137
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
@@ -149,7 +149,7 @@ csv_detective/parsing/csv.py,sha256=fJkjKvyk7InkNnYKtmivyi48mmcwvrha7gvZ5J4-86A,
|
|
|
149
149
|
csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
|
|
150
150
|
csv_detective/parsing/load.py,sha256=-pQlwOPTYVpvgt21ERa4K9ObcLozWBJbZ3kWO1U0wkE,3648
|
|
151
151
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
152
|
-
csv_detective-0.9.3.
|
|
152
|
+
csv_detective-0.9.3.dev2215.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
153
153
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
154
|
tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
|
|
155
155
|
tests/test_fields.py,sha256=R6r6dcUwPx9XWIoc1xH4z0HlCnTj_bmxw91H5Gfqq5I,13762
|
|
@@ -159,8 +159,8 @@ tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
|
|
|
159
159
|
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
160
160
|
venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
|
|
161
161
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
162
|
-
csv_detective-0.9.3.
|
|
163
|
-
csv_detective-0.9.3.
|
|
164
|
-
csv_detective-0.9.3.
|
|
165
|
-
csv_detective-0.9.3.
|
|
166
|
-
csv_detective-0.9.3.
|
|
162
|
+
csv_detective-0.9.3.dev2215.dist-info/METADATA,sha256=9F6hwttFFsBbi0eMv_UChawcvho2C9wHug4H_QEUIsQ,9931
|
|
163
|
+
csv_detective-0.9.3.dev2215.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
csv_detective-0.9.3.dev2215.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
165
|
+
csv_detective-0.9.3.dev2215.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
166
|
+
csv_detective-0.9.3.dev2215.dist-info/RECORD,,
|
|
File without changes
|
{csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2140.dist-info → csv_detective-0.9.3.dev2215.dist-info}/top_level.txt
RENAMED
|
File without changes
|