csv-detective 0.9.3.dev2140__py3-none-any.whl → 0.9.3.dev2215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,5 +12,6 @@ def _is(header: str) -> float:
12
12
  "siren organisme",
13
13
  "siren titulaire",
14
14
  "numero siren",
15
+ "epci",
15
16
  ]
16
17
  return header_score(header, words_combinations_list)
@@ -11,5 +11,6 @@ def _is(header: str) -> float:
11
11
  "siretacheteur",
12
12
  "n° siret",
13
13
  "coll siret",
14
+ "epci",
14
15
  ]
15
16
  return header_score(header, words_combinations_list)
@@ -3,44 +3,37 @@ from csv_detective.parsing.text import header_score
3
3
  PROPORTION = 0.5
4
4
 
5
5
  COMMON_COORDS_LABELS = [
6
- "c geo",
7
- "code geo",
8
- "coord gps",
6
+ "ban",
7
+ "coordinates",
9
8
  "coordonnees",
10
- "coordonnees ban",
11
- "coordonnees finales",
12
- "coordonnees geo",
13
- "coordonnees geographiques",
14
- "coordonnees geoloc",
15
- "coordonnees geoloc",
16
- "coordonnees gps",
17
9
  "coordonnees insee",
18
- "coordonnees xy",
19
10
  "geo",
20
- "geo coordinates",
21
- "geo cp",
22
- "geo localisation",
23
- "geo point",
24
- "geo point 2d",
25
- "geolocalisation",
26
- "geom x y",
27
- "geometry x y",
28
11
  "geopoint",
29
- "point geo",
30
- "point geo insee",
12
+ "geoloc",
13
+ "geolocalisation",
14
+ "geom",
15
+ "geometry",
16
+ "gps",
17
+ "localisation",
18
+ "point",
31
19
  "position",
32
- "position geographique",
33
20
  "wgs84",
21
+ ]
22
+
23
+ specific = [
24
+ "latlon",
25
+ "lat lon",
34
26
  "x y",
35
27
  "xy",
36
28
  ]
37
29
 
30
+ # we aim wide to catch exact matches if possible for the highest possible score
31
+ words = (
32
+ COMMON_COORDS_LABELS
33
+ + specific
34
+ + [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
35
+ )
36
+
38
37
 
39
38
  def _is(header: str) -> float:
40
- words_combinations_list = [
41
- "latlon wgs",
42
- "latlon",
43
- "latlong",
44
- "lat lon",
45
- ] + COMMON_COORDS_LABELS
46
- return header_score(header, words_combinations_list)
39
+ return header_score(header, words)
@@ -4,12 +4,20 @@ from ..latlon_wgs import COMMON_COORDS_LABELS
4
4
 
5
5
  PROPORTION = 0.5
6
6
 
7
+ specific = [
8
+ "lonlat",
9
+ "lon lat",
10
+ "y x",
11
+ "yx",
12
+ ]
13
+
14
+ # we aim wide to catch exact matches if possible for the highest possible score
15
+ words = (
16
+ COMMON_COORDS_LABELS
17
+ + specific
18
+ + [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
19
+ )
20
+
7
21
 
8
22
  def _is(header: str) -> float:
9
- words_combinations_list = [
10
- "lonlat wgs",
11
- "lonlat",
12
- "longlat",
13
- "lon lat",
14
- ] + COMMON_COORDS_LABELS
15
- return header_score(header, words_combinations_list)
23
+ return header_score(header, words)
@@ -93,12 +93,15 @@ def detect_formats(
93
93
  "code_departement",
94
94
  "code_commune_insee",
95
95
  "code_postal",
96
+ "code_fantoir",
96
97
  "latitude_wgs",
97
98
  "longitude_wgs",
98
99
  "latitude_wgs_fr_metropole",
99
100
  "longitude_wgs_fr_metropole",
100
101
  "latitude_l93",
101
102
  "longitude_l93",
103
+ "siren",
104
+ "siret",
102
105
  ]
103
106
  if f in scores_table.index
104
107
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2140
3
+ Version: 0.9.3.dev2215
4
4
  Summary: Detect tabular files column content
5
5
  Author-email: Etalab <opendatateam@data.gouv.fr>
6
6
  License: MIT
@@ -25,7 +25,6 @@ Requires-Dist: rstr==3.2.2
25
25
  Provides-Extra: dev
26
26
  Requires-Dist: pytest>=8.3.0; extra == "dev"
27
27
  Requires-Dist: responses>=0.25.0; extra == "dev"
28
- Requires-Dist: bumpx>=0.3.10; extra == "dev"
29
28
  Requires-Dist: ruff>=0.9.3; extra == "dev"
30
29
  Dynamic: license-file
31
30
 
@@ -220,32 +219,26 @@ ruff check --fix .
220
219
  ruff format .
221
220
  ```
222
221
 
223
- ## Release
222
+ ### 🏷️ Release
224
223
 
225
- The release process uses `bumpx`.
224
+ The release process uses the [`tag_version.sh`](tag_version.sh) script to create git tags and update [CHANGELOG.md](CHANGELOG.md) and [pyproject.toml](pyproject.toml) automatically.
226
225
 
227
- ```shell
228
- pip install -e .[dev]
229
- ```
230
-
231
- ### Process
232
-
233
- 1. `bumpx` will handle bumping the version according to your command (patch, minor, major)
234
- 2. It will update the CHANGELOG according to the new version being published
235
- 3. It will push a tag with the given version to github
236
- 4. CircleCI will pickup this tag, build the package and publish it to pypi
237
- 5. `bumpx` will have everything ready for the next version (version, changelog...)
226
+ ```bash
227
+ # Create a new release
228
+ ./tag_version.sh <version>
238
229
 
239
- ### Dry run
230
+ # Example
231
+ ./tag_version.sh 2.5.0
240
232
 
241
- ```shell
242
- bumpx -d -v
233
+ # Dry run to see what would happen
234
+ ./tag_version.sh 2.5.0 --dry-run
243
235
  ```
244
236
 
245
- ### Release
246
-
247
- This will release a patch version:
237
+ **Prerequisites**: GitHub CLI (`gh`) must be installed and authenticated, and you must be on the main branch with a clean working directory.
248
238
 
249
- ```shell
250
- bumpx -v
251
- ```
239
+ The script automatically:
240
+ - Updates the version in pyproject.toml
241
+ - Extracts commits since the last tag and formats them for CHANGELOG.md
242
+ - Identifies breaking changes (commits with `!:` in the subject)
243
+ - Creates a git tag and pushes it to the remote repository
244
+ - Creates a GitHub release with the changelog content
@@ -97,8 +97,8 @@ csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=AI9nqj3zm6_vyc
97
97
  csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=4Crk045ZD_tVovI7C-IqjKFz23Ej5-hrFkhZK4OilqA,258
98
98
  csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=N7LzmtNwZERgrwMy3EFHaVBpdiwkt2_9Tt7XVJLff6U,406
99
99
  csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=ZWhc8S9L1X2fFh2g5Ja-LuhsfHg_lALKrur6yDnGDPk,238
100
- csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=g7Y7IvW9VKO528z1MSPxfFtRB7kQXSiG7QQ-VZRfFEk,386
101
- csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=-gvdxUnv3LRfje60ljC4F3B2c1LBcWfV3zZbV3VJZ08,323
100
+ csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=cGzc9HPzbWlffkzJgwujUqupLi1Pkm0HWBLZv-_c4to,402
101
+ csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Av8IsLre6pRnPj-AHtqaU-1C_TMCxgDYAbTGIW0XIdU,339
102
102
  csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=4jIZ9cmN73XhP4ayGcEMcB_y0X45oRk1Lq2p_pNfgok,426
103
103
  csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=5L6JowK9y6y9uZNg6hWzknMSzh0SurkwQeTINNKTdYY,599
104
104
  csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -110,9 +110,9 @@ csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=biUZP
110
110
  csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=biUZP8gAsVpjXLTx1WeS19qR4ia0pzpi6R69wJgu4B0,348
111
111
  csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6fTQCXH4MYao2xmRu-o7xWcab7Jg,355
112
112
  csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
113
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
113
+ csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=CegBNN-RR1k-I0OU7ZsdlpVI5UBYDcj5QDX9KaWay-w,701
114
114
  csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
115
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=7gbumJFp5xhz4GZ4uTAJQoxw5D53WJZddptyANmdEws,346
115
+ csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=ZmBLiCyboJzpsbXa5fsTxvAbO0W-ukRXnRWemN-Z-wc,481
116
116
  csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
117
  csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
118
118
  csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
@@ -131,7 +131,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
131
131
  csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
132
132
  csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
133
133
  csv_detective/detection/engine.py,sha256=NpWUgqsNXogBnVclPYccqJZVtDd780houVY-YIMr5c0,1511
134
- csv_detective/detection/formats.py,sha256=QXdxdECU5uC_ytLBT_6-xe0VAiaMptXF4KYiShRUVCA,7702
134
+ csv_detective/detection/formats.py,sha256=VxLHyQNUb7SrBkS1uV6cTK7cSrCVgrpAd3nd_74s2B0,7775
135
135
  csv_detective/detection/headers.py,sha256=hvYU13Nq8GWci5skc5vVUOxM0DwOUwbjVMlmY94lWhA,1135
136
136
  csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
137
137
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -149,7 +149,7 @@ csv_detective/parsing/csv.py,sha256=fJkjKvyk7InkNnYKtmivyi48mmcwvrha7gvZ5J4-86A,
149
149
  csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
150
150
  csv_detective/parsing/load.py,sha256=-pQlwOPTYVpvgt21ERa4K9ObcLozWBJbZ3kWO1U0wkE,3648
151
151
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
152
- csv_detective-0.9.3.dev2140.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
152
+ csv_detective-0.9.3.dev2215.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
153
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
154
  tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
155
155
  tests/test_fields.py,sha256=R6r6dcUwPx9XWIoc1xH4z0HlCnTj_bmxw91H5Gfqq5I,13762
@@ -159,8 +159,8 @@ tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
159
159
  tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
160
160
  venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
161
161
  venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
162
- csv_detective-0.9.3.dev2140.dist-info/METADATA,sha256=kAuk6tI5cOB7zLgqjzVki_fDHUhH7lrFtu1fxXra1o4,9736
163
- csv_detective-0.9.3.dev2140.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
- csv_detective-0.9.3.dev2140.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
- csv_detective-0.9.3.dev2140.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
166
- csv_detective-0.9.3.dev2140.dist-info/RECORD,,
162
+ csv_detective-0.9.3.dev2215.dist-info/METADATA,sha256=9F6hwttFFsBbi0eMv_UChawcvho2C9wHug4H_QEUIsQ,9931
163
+ csv_detective-0.9.3.dev2215.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
+ csv_detective-0.9.3.dev2215.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
+ csv_detective-0.9.3.dev2215.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
166
+ csv_detective-0.9.3.dev2215.dist-info/RECORD,,